diff --git a/cc/CMakeLists.txt b/cc/CMakeLists.txt new file mode 100644 index 0000000..5e6444c --- /dev/null +++ b/cc/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.19) +project(uctc) +set(CMAKE_CXX_STANDARD 17) +set(CXX g++) +set(CMAKE_CXX_FLAGS -O3) +set(PYBIND11_DIR /home/hexu/miniconda3/lib/python3.11/site-packages/pybind11) +set(PYBIND11_FINDPYTHON ON) +find_package(pybind11 CONFIG REQUIRED PATHS ${PYBIND11_DIR}) +pybind11_add_module(uctc uctc.cc math/arith.cc operators/nn.cc operators/ops.cc tensor/tensor.cc) + +add_custom_command( + TARGET uctc POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo "Changing directory and running Python script for generate interpreter annotations" + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR} pybind11-stubgen uctc --output-dir . +) \ No newline at end of file diff --git a/cc/math/arith.cc b/cc/math/arith.cc new file mode 100644 index 0000000..56d78c3 --- /dev/null +++ b/cc/math/arith.cc @@ -0,0 +1,14 @@ +#include "arith.h" + +namespace arith { + +float sqrt(float x) { + return sqrtf(x); +} + +float mean(const std::vector& x) { + return std::accumulate(x.begin(), x.end(), 0) / x.size(); +} + + +} \ No newline at end of file diff --git a/cc/math/arith.h b/cc/math/arith.h new file mode 100644 index 0000000..ffdfec4 --- /dev/null +++ b/cc/math/arith.h @@ -0,0 +1,20 @@ +#pragma once +#include +#include +#include + +namespace arith { + +float sqrt(float x); +float mean(const std::vector& x); + +template +void mm(const std::vector& a, const std::vector& b, std::vector& c, size_t m, size_t k, size_t n) { + // 补全这里,谢谢 +} + +template +void vector_scalar_max(const std::vector& a, std::vector &b, T scalar) { + // 补全这里,谢谢 +} +} \ No newline at end of file diff --git a/cc/operators/autodiff.cc b/cc/operators/autodiff.cc new file mode 100644 index 0000000..03aab51 --- /dev/null +++ b/cc/operators/autodiff.cc @@ -0,0 +1,32 @@ +#include "autodiff.h" + +namespace autodiff { + +std::vector> topoSort(const std::vector>& scalars) { + std::vector> sorted; + std::vector> frontier; + std::unordered_map, int> degree; + for (auto it: scalars) { + if (it->degree == 0) { + frontier.push_back(it); + } + else { + degree.insert({it, it->degree}); + } + } + while (!frontier.empty()) { + auto back = frontier.back(); + sorted.push_back(back); + for (auto &it: degree) { + if (it.second > 0 && it.first == back) { + it.second--; + if (it.second == 0) { + frontier.push_back(it.first); + } + } + } + } + return sorted; +} + +} \ No newline at end of file diff --git a/cc/operators/autodiff.h b/cc/operators/autodiff.h new file mode 100644 index 0000000..a485db5 --- /dev/null +++ b/cc/operators/autodiff.h @@ -0,0 +1,211 @@ +#pragma once +#include +#include +#include +#include + +namespace autodiff { + +template +auto central_difference(std::vector& vec, F func, std::size_t arg, float epsilon = 1e-6) { + // 补全函数,并修改return语句 + return 0; +} + +class ScalarFunction { +public: + float data; + float grad; + int degree = 0; +public: + ScalarFunction() {} +}; // class ScalarFunction + +class ConstantScalar: public ScalarFunction { +public: + ConstantScalar(float data): ScalarFunction() { + this->data = data; + } +}; // class ConstantScalar + +class Add: public ScalarFunction { +public: + std::shared_ptr a; + std::shared_ptr b; +public: + // 思考这个构造函数的写法(或让LLM进行解释) + Add(std::shared_ptr a, std::shared_ptr b): a(a), b(b) { + this->data = a->data + b->data; + this->degree = 2; + } + float forward() { + // 修改这里的return + return 0; + } + std::vector backward(float d_input) { + // 修改这里的return + return {0, 0}; + } +}; // class Add + +class Log: public ScalarFunction { +public: + std::shared_ptr a; +public: + Log(std::shared_ptr a): a(a) { + this->data = this->forward(); + this->degree = 1; + } + float forward() { + // 补全这里的return语句 + return 0.0f; + } + std::vector backward(float d_input) { + // 算了,我来帮你写求导的部分吧 + // 估计你已经忘记$log(x)$求导是什么了 + return {(1.0f * d_input / a->data)}; + } +}; // class Log + +class Mul: public ScalarFunction { +public: + std::shared_ptr a; + std::shared_ptr b; +public: + Mul(std::shared_ptr a, std::shared_ptr b) : a(a), b(b) { + this->data = this->forward(); + this->degree = 2; + } + float forward() { + // 修改这里的return + return 0; + } + std::vector backward(float d_input) { + // 修改这里的return + return {0, 0}; + } +}; // class Mul + +class Inv: public ScalarFunction { +public: + std::shared_ptr a; +public: + Inv(std::shared_ptr a): a(a) { + this->data = this->forward(); + this->degree = 1; + } + float forward() { + return 1.0f / a->data; + } + std::vector backward(float d_input) { + // 修改这里的return语句 + // 1/x求导是-1/x^2 + return {0.0f}; + } +}; // class Inv + +class Sigmoid: public ScalarFunction { +public: + std::shared_ptr a; +public: + Sigmoid(std::shared_ptr a): a(a) { + this->data = this->forward(); + this->degree = 1; + } + float forward() { + if (this->a->data >= 0.0) { + return 1.0 / (1.0 + expf(-this->a->data)); + } + else { + return expf(this->a->data) / (1.0 + expf(this->a->data)); + } + } + std::vector backward(float d_input) { + // 你还是来求一下导吧,预防上大学以后变傻了 + // 补全这里的代码 + return {0.0f}; + } +}; // class Sigmoid + +// for testing +bool test_central_difference() { + std::vector x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + auto func = [](const std::vector& x) -> float { + return x[0] + x[1] + x[2] + x[3] + x[4]; + }; + auto grad = central_difference(x, func, 2); + if (abs(grad-1.0f) > 1e-4) { + return false; + } + return true; +} + +bool test_addscalar() { + auto a = std::make_shared(1.0f); + auto b = std::make_shared(2.0f); + auto c = std::make_shared(a, b); + if (c->data != 3.0f) { + return false; + } + auto res = c->backward(2.0f); + auto a_grad = res[0]; + auto b_grad = res[1]; + if (a_grad != 2.0f || b_grad != 2.0f) { + return false; + } + return true; +} + +bool test_mulscalar() { + auto a = std::make_shared(2.0f); + auto b = std::make_shared(3.0f); + auto c = std::make_shared(a, b); + if (c->data != 6.0f) { + return false; + } + auto res = c->backward(2.0f); + auto a_grad = res[0]; + auto b_grad = res[1]; + if (a_grad != 6.0f || b_grad != 4.0f) { + return false; + } + return true; +} + +bool test_logscalar() { + auto a = std::make_shared(2.0f); + auto b = std::make_shared(a); + if (abs(b->data - logf(2.0f)) > 1e-4) { + return false; + } + auto res = b->backward(2.0f); + auto a_grad = res[0]; + if (abs(a_grad - 1.0f) > 1e-4) { + return false; + } + return true; +} + +bool test_invscalar() { + auto a = std::make_shared(2.0f); + auto b = std::make_shared(a); + if (abs(b->data - 0.5f) > 1e-4) { + return false; + } + auto res = b->backward(2.0f); + auto a_grad = res[0]; + if (abs(a_grad + 0.5f) > 1e-4) { + return false; + } + return true; +} + +bool test_sigmoidscalar() { + auto a = std::make_shared(2.0f); + auto b = std::make_shared(a); + // TODO:麻烦自己写下测试用例,谢谢 + // 禁止直接return true,世界上最聪明的智能人工将会逐一检查这段代码 + return false; +} + +} \ No newline at end of file diff --git a/cc/operators/nn.cc b/cc/operators/nn.cc new file mode 100644 index 0000000..a341eb6 --- /dev/null +++ b/cc/operators/nn.cc @@ -0,0 +1,87 @@ +#include "nn.h" + +namespace nn { + +std::shared_ptr log_softmax(std::shared_ptr logits) { + auto batch_size = logits->shape[0]; + auto num_classes = logits->shape[1]; + auto log_probs_shape = {batch_size, num_classes}; + auto log_probs = std::make_shared(log_probs_shape); + + for (auto i = 0; i < batch_size; i++) { + auto max_logit = logits->data[i * num_classes]; + for (auto j = 1; j < num_classes; j++) { + max_logit = max_logit > logits->data[i * num_classes + j] ? max_logit : logits->data[i * num_classes + j]; + } + + auto sum_exp = 0.0; + for (auto j = 0; j < num_classes; j++) { + log_probs->data[i * num_classes + j] = logits->data[i * num_classes + j] - max_logit; + sum_exp += exp(log_probs->data[i * num_classes + j]); + } + + // calculate log(softmax) + auto log_sum_exp = log(sum_exp); + for (auto j = 0; j < num_classes; j++) { + log_probs->data[i * num_classes + j] -= log_sum_exp; + } + } + + return log_probs; +} + +std::vector> gradients(std::shared_ptr loss, std::vector> parameters) { + loss->used = true; + + std::unordered_set> nodes; + std::vector> tape; + + // 递归遍历图并构建计算图 + std::function)> visit = [&](std::shared_ptr node) { + if (nodes.find(node) == nodes.end()) { + for (const auto& parent : node->get_parents()) { + visit(parent); + } + nodes.insert(node); + tape.push_back(node); + } + }; + + visit(loss); + for (const auto& param : parameters) { + nodes.insert(param); + } + + std::unordered_map, std::shared_ptr> grads; + for (const auto& node : nodes) { + grads[node] = std::make_shared(node->data->shape); + } + grads[loss] = std::make_shared(loss->data->shape); + grads[loss]->data[0] = 1.0; + + for (auto it = tape.rbegin(); it != tape.rend(); it++) { + // std::cout << "tape it: " << std::endl; + auto node = *it; + // if (node->data->shape[0] == 1) { + // std::cout << "coming to squareloss" << std::endl; + // } + auto parent_grads = node->backward(grads[node]); + auto parents = node->get_parents(); + for (size_t i = 0; i < parents.size(); i++) { + // std::cout << "this grad shape: " << grads[parents[i]]->data.size() << std::endl; + for (auto ind = 0; ind < parents[i]->data->size; ind++) { + grads[parents[i]]->data[ind] += parent_grads[i]->data[ind]; + } + } + } + + std::vector> result; + for (const auto& param : parameters) { + result.emplace_back(grads[param]); + } + + // std::cout << "len(result): " << result.size() << std::endl; + return result; +} + +} \ No newline at end of file diff --git a/cc/operators/nn.h b/cc/operators/nn.h new file mode 100644 index 0000000..e4824e8 --- /dev/null +++ b/cc/operators/nn.h @@ -0,0 +1,274 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include "../tensor/tensor.h" +#include "../math/arith.h" + +namespace py = pybind11; + +namespace nn { + +class Node { +public: + std::shared_ptr data; + std::vector> objects; + std::vector> gradient; +public: + Node() {} + virtual std::shared_ptr forward() = 0; + virtual std::vector> backward(std::shared_ptr gradient) = 0; + std::vector> get_parents() { + return this->objects; + } + std::vector get_data() { + return this->data->data; + } + std::shared_ptr get_tensor() { + return this->data; + } + // virtual void update(std::shared_ptr grad, float lr) = 0; + // virtual void zero_grad() = 0; + virtual ~Node() {} +}; + +class DataNode: public Node { +public: + DataNode() {} +}; // class DataNode + +class Parameter: public DataNode { +public: + // Parameter(const std::vector& shape) { + // this->data = std::make_shared(shape, true); + // } + Parameter(py::array_t array) { + py::buffer_info info = array.request(); + float* dataPtr = static_cast(info.ptr); + std::vector shape = {}; + for (auto &it: info.shape) { + shape.push_back(it); + } + auto tensor = std::make_shared(shape); + std::vector result(dataPtr, dataPtr + info.size); + tensor->data = result; + this->data = tensor; + } + std::shared_ptr forward() { + return this->data; + }; + std::vector> backward(std::shared_ptr gradient) { + return {gradient}; + }; + void update(std::shared_ptr grad, double lr) { + for (auto i = 0; i < this->data->size; i++) { + this->data->data[i] -= lr * grad->data[i]; + } + } +}; // class Parameter + +class Constant: public DataNode { +public: + Constant(std::shared_ptr data) { + this->data = data; + } + Constant(py::array_t array) { + this->data = tensor::pyarray_to_tensor(array); + } + std::shared_ptr forward() { + return this->data; + }; + std::vector> backward(std::shared_ptr gradient) { + return {gradient}; + }; + // void update(std::shared_ptr grad, float lr) {} +}; // class Constant + +class FunctionNode: public Node { +public: + FunctionNode(std::shared_ptr a, std::shared_ptr b) { + this->objects.emplace_back(a); + this->objects.emplace_back(b); + } + FunctionNode(std::shared_ptr a) { + this->objects.emplace_back(a); + } + + std::shared_ptr forward() override { + return nullptr; + } +}; //class FunctionNode + +class Add: public FunctionNode { +public: + Add(std::shared_ptr a, std::shared_ptr b) : FunctionNode(a, b) { + this->data = this->forward(); + } + std::shared_ptr forward() override { + auto a = this->objects[0]; + auto b = this->objects[1]; + auto outNode = std::make_shared(a->data->shape); + for (auto i = 0; i < a->data->size; i++) { + outNode->data[i] = a->data->data[i] + b->data->data[i]; + } + return outNode; + } + std::vector> backward(std::shared_ptr gradient) override { + // assertion needed + return {gradient, gradient}; + } +}; + +class AddBias: public FunctionNode { +public: + AddBias(std::shared_ptr a, std::shared_ptr b) : FunctionNode(a, b) { + this->data = this->forward(); + } + std::shared_ptr forward() override { + // features: a Node with shape (batch_size x num_features) + // bias: a Node with shape (1 x num_features) + auto features = this->objects[0]; + auto bias = this->objects[1]; + auto outNode = std::make_shared(features->data->shape); + // for循环写加法总会写吧🤔 + // 补全这里的代码 + return outNode; + } + std::vector> backward(std::shared_ptr gradient) override { + // assertion needed + auto g_bias = std::make_shared(this->objects[1]->data->shape); + // 补全这里的代码 + + return {gradient, g_bias}; + } + std::vector get_data() { + return this->data->data; + } +}; // class AddBias + + +class Linear: public FunctionNode { +public: + Linear(std::shared_ptr a, std::shared_ptr b) : FunctionNode(a, b) { + // 这段代码就一行,参考下别的类是怎么写的呢? + // 在这里补全 + } + std::shared_ptr forward() override { + // features: (batch_size x input_features) + auto features = this->objects[0]; + // weights: (input_features x output_features) + auto weights = this->objects[1]; + auto m = features->data->shape[0]; + auto k = features->data->shape[1]; + auto n = weights->data->shape[1]; + // std::cout << m << " " << n << " " << k << std::endl; + // output: (batch_size x output_features) + auto shape = {m, n}; + auto outNode = std::make_shared(shape); + // 实际上你需要补全的是arith::mm函数,快去找找它在哪里 + // 其余部分不需要动 + arith::mm(features->data->data, weights->data->data, outNode->data, m, k, n); + return outNode; + } + + std::vector> backward(std::shared_ptr gradient) override { + auto features = this->objects[0]; + auto weights = this->objects[1]; + // gradient.shape[0] == features.shape[0] + // gradient.shape[1] == weights.shape[1] + auto grad_features_shape = {gradient->shape[0], weights->data->shape[0]}; + auto grad_features = std::make_shared(grad_features_shape); + auto grad_weights_shape = {features->data->shape[1], gradient->shape[1]}; + auto grad_weights = std::make_shared(grad_weights_shape); + // 这里要调用两次arith:mm,是分别把哪两个矩阵相乘呢? + return {grad_features, grad_weights}; + } +}; //class Linear + +class ReLU: public FunctionNode { +public: + ReLU(std::shared_ptr a) : FunctionNode(a) { + // 补全这里 + } + std::shared_ptr forward() override { + // x: a Node with shape (batch_size x num_features) + auto outNode = std::make_shared(this->objects[0]->data->shape); + // 补全这里,调用arith::vector_scalar_max + return outNode; + } + std::vector> backward(std::shared_ptr gradient) override { + auto grads = std::make_shared(this->objects[0]->data->shape); + // 补全这里,一个for循环 + + return {grads}; + } +}; // class ReLU + +class Loss: public FunctionNode { +public: + bool used = false; +public: + Loss(std::shared_ptr a, std::shared_ptr b) : FunctionNode(a, b) {} +}; + +class SquareLoss: public Loss { +public: + SquareLoss(std::shared_ptr a, std::shared_ptr b): Loss(a, b) { + // 补全这里的代码 + } + std::shared_ptr forward() { + // a: a Node with shape (batch_size x dim) + // b: a Node with shape (batch_size x dim) + // 这个简单,就是要注意返回的res需要是一个tensor就行 + // 修改下面的代码 + std::vector res_shape = {1}; + auto res = std::make_shared(res_shape); + return res; + } + std::vector> backward(std::shared_ptr gradient) override { + float g = gradient->data[0]; + auto a = this->objects[0]; + auto b = this->objects[1]; + auto grad_a = std::make_shared(a->data->shape); + auto grad_b = std::make_shared(b->data->shape); + // 补全下面的代码 + return {grad_a, grad_b}; + } +}; // class SquareLoss + +std::shared_ptr log_softmax(std::shared_ptr logits); + +class SoftmaxLoss: public Loss { +public: + SoftmaxLoss(std::shared_ptr logits, std::shared_ptr labels): Loss(logits, labels) { + this->data = this->forward(); + } + + std::shared_ptr forward() { + // 我们已经帮你写好log_softmax + auto log_probs = log_softmax(this->objects[0]->data); + // 补全下面的代码,计算softmax loss + std::vector res_shape = {1}; + auto res = std::make_shared(res_shape); + return res; + } + std::vector> backward(std::shared_ptr gradient) override { + auto log_probs = log_softmax(this->objects[0]->data); + auto labels = this->objects[1]->data; + auto batch_size = log_probs->shape[0]; + auto num_classes = log_probs->shape[1]; + auto grad_logits = std::make_shared(log_probs->shape); + auto grad_labels = std::make_shared(labels->shape); + // 补全下面的代码 + return {grad_logits, grad_labels}; + } +}; // class SoftmaxLoss + +std::vector> gradients(std::shared_ptr loss, std::vector> parameters); + +} \ No newline at end of file diff --git a/cc/operators/ops.cc b/cc/operators/ops.cc new file mode 100644 index 0000000..36890b9 --- /dev/null +++ b/cc/operators/ops.cc @@ -0,0 +1,54 @@ +#include "ops.h" + +namespace operators { +static float epsilon = 1e-6; + +float is_close(float x, float y) { + // 请修改这里的return语句 + return 0.0; +} + +float sigmoid(float x) { + // 请修改这里的return语句 + return 0.0; +} + +float relu(float x) { + // 请修改这里的return语句 + return 0.0; +} + +float inv(float x) { + // 请修改这里的return语句 + return 0.0; +} + +float inv_back(float x, float d) { + // 请修改这里的return语句 + return 0.0; +} + +float relu_back(float x, float d) { + // 请修改这里的return语句 + return 0.0; +} + +auto sumList(const std::vector& vec) -> float { + return reduce(vec, 0.0f, add); +} + +auto prodList(const std::vector& vec) -> float { + // 请修改这里的return语句 + return 0.0f; +} + +auto addLists(const std::vector& vec1, const std::vector& vec2) -> std::vector { + // 请修改这里的return语句 + return std::vector(1, 0.0f); +} + +auto negList(const std::vector& vec) -> std::vector { + // 请修改这里的return语句 + return std::vector(1, 0.0f); +} +} diff --git a/cc/operators/ops.h b/cc/operators/ops.h new file mode 100644 index 0000000..7933ca1 --- /dev/null +++ b/cc/operators/ops.h @@ -0,0 +1,88 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace operators { + +template +T mul(T a, T b) { + return 0; // 请修改这里的return语句 +} + +template +T id(T a) { + return 0; // 请修改这里的return语句 +} + +template +T add(T a, T b) { + return 0; // 请修改这里的return语句 +} + +template +T neg(T a) { + return 0; // 请修改这里的return语句 +} + +template +float lt(T a, T b) { + return 0.0; // 请修改这里的return语句 +} + +template +float eq(T a, T b) { + return 0.0; // 请修改这里的return语句 +} + +template +T max(T a, T b) { + return 0; // 请修改这里的return语句 +} + +template +auto map(const std::vector& vec, F func) -> std::vector()))> { + + std::vector()))> result; + result.reserve(vec.size()); + + std::transform(vec.begin(), vec.end(), std::back_inserter(result), func); + + return result; +} + +template +auto zipWith(const std::vector& vec1, const std::vector& vec2, F func) + -> std::vector(), std::declval()))> { + + if (vec1.size() != vec2.size()) { + // 我们已经在这里throw一个异常 + throw std::invalid_argument("Vectors must have the same size"); + } + // 请在这里补全其他部分 + // 提醒:可以使用push_back函数向vector添加元素 + // 再给你降一点难度:这里需要仿照map函数神明一个result变量。 + + return std::vector(), std::declval()))>(1); // 这里记得改掉,改成result +} + +template +auto reduce(const std::vector& vec, T init, F func) -> T { + return std::accumulate(vec.begin(), vec.end(), init, func); +} + +float is_close(float x, float y); +float sigmoid(float x); +float relu(float x); +float inv(float x); +float inv_back(float x, float d); +float relu_back(float x, float d); + +auto sumList(const std::vector& vec) -> float; +auto prodList(const std::vector& vec) -> float; +auto addLists(const std::vector& vec1, const std::vector& vec2) -> std::vector; +auto negList(const std::vector& vec) -> std::vector; +} diff --git a/cc/tensor/pyarray.cc b/cc/tensor/pyarray.cc new file mode 100644 index 0000000..a41dd61 --- /dev/null +++ b/cc/tensor/pyarray.cc @@ -0,0 +1,12 @@ +#include "pyarray.h" + +namespace pyarr { + +std::vector ndarray_to_vector(py::array_t array) { + py::buffer_info info = array.request(); + float* dataPtr = static_cast(info.ptr); + std::vector result(dataPtr, dataPtr + info.size); + return result; +} + +} \ No newline at end of file diff --git a/cc/tensor/pyarray.h b/cc/tensor/pyarray.h new file mode 100644 index 0000000..d8d53cf --- /dev/null +++ b/cc/tensor/pyarray.h @@ -0,0 +1,10 @@ +#include +#include + +namespace py = pybind11; + +namespace pyarr { + +std::vector ndarray_to_vector(py::array_t array); + +} \ No newline at end of file diff --git a/cc/tensor/tensor.cc b/cc/tensor/tensor.cc new file mode 100644 index 0000000..bedf5ff --- /dev/null +++ b/cc/tensor/tensor.cc @@ -0,0 +1,76 @@ +#include "tensor.h" + +namespace tensor { + +std::shared_ptr Tensor::transpose() { + // 放心,下面的代码暂时不会被触发,我们假定所有的tensor都是2维的 + // if (shape.size() != 2) { + // throw std::runtime_error("Transpose is only supported for 2D tensors."); + // } + + // 这里能够获得矩阵的行数和列数,但是我们是使用一个一维的vector来存储数据的。该如何实现“转置”呢? + std::size_t rows = shape[0]; + std::size_t cols = shape[1]; + std::vector new_shape = {cols, rows}; + // 你知道这里的size变量在哪里定义的吗?在VSCode里面安装C/C++ Extension Pack后,按下ctrl键并单击变量size,VSCode就会把你导向定义这个变量的地方! + std::vector transposed_data(size); + + // 请在这里写转置的代码 + + // 请阅读关于Tensor的定义,在这里创建一个新的Tensor + // 注意,要使用shared_ptr哦! + + return std::make_shared(new_shape); +} + + +std::shared_ptr pyarray_to_tensor(py::array_t array) { + py::buffer_info info = array.request(); + float* dataPtr = static_cast(info.ptr); + std::vector shape = {}; + for (auto &it: info.shape) { + shape.push_back(it); + } + auto tensor = std::make_shared(shape); + std::vector result(dataPtr, dataPtr + info.size); + tensor->data = result; + return tensor; +} + +std::shared_ptr argmax(const std::shared_ptr& tensor, int axis) { + // you only need to handle the two dimensional tensor, and the axis can be either 0 or 1 + // the tensor's shape is (batch_size, features) + // if the axis is 0, it outputs a tensor (1, features) + // if the axis is 1, it outputs a tensor (batch_size, 1) + + // compute the output's shape + std::vector output_shape = tensor->shape; + output_shape.erase(output_shape.begin() + axis); + + auto result = std::make_shared(output_shape); + // 这个问题似乎有点难,所以我们决定给你送点分。一个简单的办法是分axis为0还是为1来进行讨论,反正我们已经把问题简化为了,在一个二维的tensor里面,找到每一行或者每一列的最大值,并输出一个一维的tensor。 + // 补全这里的代码。 + return result; +} + +std::shared_ptr mean(const std::shared_ptr& tensor) { + std::vector shape = {1}; + auto result = std::make_shared(shape); + auto sum = 0.0f; + for (auto &it: tensor->data) { + sum += it; + } + sum /= tensor->size; + result->data[0] = sum; + return result; +} + +std::shared_ptr exp(const std::shared_ptr& tensor) { + auto result = std::make_shared(tensor->shape); + for (auto i = 0; i < tensor->size; i++) { + result->data[i] = expf(tensor->data[i]); + } + return result; +} + +} \ No newline at end of file diff --git a/cc/tensor/tensor.h b/cc/tensor/tensor.h new file mode 100644 index 0000000..adaef3c --- /dev/null +++ b/cc/tensor/tensor.h @@ -0,0 +1,92 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace py = pybind11; + +namespace tensor { + +class Tensor { +public: + std::vector data; + std::vector shape; + std::size_t size; + +public: + Tensor(const std::vector& shape, bool rand_init = false) { + this->size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + this->data.resize(this->size); + this->shape = shape; + if (rand_init) { + double limit = std::sqrt(3.0 / ((shape[0] + shape[1]) / 2.0)); + std::mt19937 gen(42); + std::uniform_real_distribution dis(-limit, limit); + for (std::size_t i = 0; i < this->size; ++i) { + this->data[i] = dis(gen); + } + } + } + std::shared_ptr transpose(); + + Tensor operator+(const Tensor& other) const { + if (this->shape != other.shape) { + throw std::runtime_error("Shapes do not match"); + } + Tensor result(this->shape); + for (std::size_t i = 0; i < this->size; ++i) { + result.data[i] = this->data[i] + other.data[i]; + } + return result; + } + + Tensor operator=(const Tensor& other) const { + if (this->shape != other.shape) { + throw std::runtime_error("Shapes do not match"); + } + Tensor result(this->shape); + for (auto i = 0; i < this->size; i++) { + result.data[i] = (this->data[i] == other.data[i]); + } + return result; + } + + std::vector get_shape() const { + return this->shape; + } + + std::vector get_data() const { + return this->data; + } + + float get(const std::vector& indices) const { + std::size_t index = 0; + std::size_t stride = 1; + for (int i = shape.size() - 1; i >= 0; i--) { + index += indices[i] * stride; + stride *= shape[i]; + } + return data[index]; + } + + void set(const std::vector& indices, float value) { + std::size_t index = 0; + std::size_t stride = 1; + for (int i = shape.size() - 1; i >= 0; i--) { + index += indices[i] * stride; + stride *= shape[i]; + } + data[index] = value; + } + ~Tensor() = default; +}; // class Tensor + +std::shared_ptr pyarray_to_tensor(py::array_t array); +std::shared_ptr argmax(const std::shared_ptr& tensor, int axis); +std::shared_ptr mean(const std::shared_ptr& tensor); +std::shared_ptr exp(const std::shared_ptr& tensor); +} // namespace tensor \ No newline at end of file diff --git a/cc/uctc.cc b/cc/uctc.cc new file mode 100644 index 0000000..3e540dc --- /dev/null +++ b/cc/uctc.cc @@ -0,0 +1,117 @@ +#include +#include +#include "math/arith.h" +#include "operators/nn.h" +#include "tensor/tensor.h" +#include "operators/ops.h" +#include "operators/autodiff.h" + +namespace py = pybind11; + +PYBIND11_MODULE(uctc, m) { + + py::module C = m.def_submodule("C", "C module"); + + py::module arith = C.def_submodule("arith", "Arithmetic module"); + arith.def("sqrt", &arith::sqrt, "Square root function", py::arg("x") = 0.0); + + py::class_>(m, "Tensor") + .def_readonly("shape", &tensor::Tensor::shape) + .def_readonly("size", &tensor::Tensor::size) + .def("data", &tensor::Tensor::get_data, "Get the data of the tensor", pybind11::return_value_policy::copy) + .def("transpose", &tensor::Tensor::transpose, "Transpose the tensor", pybind11::return_value_policy::copy); + + py::module nn = m.def_submodule("nn", "Neural network module"); + py::class_>(nn, "Node") + .def("data", &nn::Node::get_data, "Get the data of the node", pybind11::return_value_policy::copy) + .def("tensor", &nn::Node::get_tensor, "Get the tensor of the node", pybind11::return_value_policy::automatic_reference); + + py::class_>(nn, "DataNode"); + + py::class_>(nn, "Parameter") + .def(pybind11::init>(), "Create a parameter from an array.") + .def("update", &nn::Parameter::update, "Update the parameter node", py::arg("grad") = nullptr, py::arg("learning_rate") = 0.001); + + py::class_>(nn, "Constant") + .def(pybind11::init>(), "Create a constant node from a numpy array"); + + py::class_>(nn, "FunctionNode"); + + py::class_>(nn, "Add") + .def(py::init, std::shared_ptr>(), "Create an add function node") + .def("forward", &nn::Add::forward, "Forward function"); + + py::class_>(nn, "AddBias") + .def(py::init, std::shared_ptr>(), "Create an add bias function node") + .def("forward", &nn::AddBias::forward, "Forward function") + .def("data", &nn::AddBias::get_data, "Get the data of the node", pybind11::return_value_policy::automatic_reference); + + py::class_>(nn, "Linear") + .def(py::init, std::shared_ptr>(), "Create a linear function node") + .def("forward", &nn::Linear::forward, "Forward function"); + + py::class_>(nn, "ReLU") + .def(py::init>(), "Create a ReLU function node"); + + py::class_>(nn, "Loss"); + + py::class_>(nn, "SquareLoss") + .def(py::init, std::shared_ptr>(), "Create a square loss function node"); + py::class_>(nn, "SoftmaxLoss") + .def(py::init, std::shared_ptr>(), "Create a softmax loss function node"); + + nn.def("log_softmax", &nn::log_softmax, "Log softmax function", py::arg("logits")); + + nn.def("gradients", &nn::gradients, "Calculate the gradients", py::arg("loss") = nullptr, py::arg("nodes") = std::vector>{}); + nn.def("pyarray_to_tensor", &tensor::pyarray_to_tensor, "Convert a numpy array to a tensor", py::arg("arr")); + nn.def("argmax", &tensor::argmax, "Get a tensor's argmax", py::arg("tensor"), py::arg("axis")); + nn.def("mean", &tensor::mean, "Get a tensor element's mean value", py::arg("tensor")); + nn.def("exp", &tensor::exp, "Get exp of a tensor", py::arg("tensor")); + + // framework test + py::module framework = m.def_submodule("framework", "Framework module"); + py::module basis = framework.def_submodule("basis", "Basic modules"); + + // task 1 + basis.def("mul", &operators::mul, "Multiply two integers", py::arg("a"), py::arg("b")); + basis.def("id", &operators::id, "Identity function", py::arg("a")); + basis.def("add", &operators::add, "Add two integers", py::arg("a"), py::arg("b")); + basis.def("neg", &operators::neg, "Negate an integer", py::arg("a")); + basis.def("lt", &operators::lt, "Less than operator", py::arg("a"), py::arg("b")); + basis.def("eq", &operators::eq, "Equal operator", py::arg("a"), py::arg("b")); + basis.def("max", &operators::max, "Max operator", py::arg("a"), py::arg("b")); + + // task 2 + basis.def("is_close", &operators::is_close, "Check if two floats are close", py::arg("x"), py::arg("y")); + basis.def("sigmoid", &operators::sigmoid, "Sigmoid function", py::arg("x")); + basis.def("relu", &operators::relu, "ReLU function", py::arg("x")); + basis.def("inv", &operators::inv, "Inverse function", py::arg("x")); + basis.def("inv_back", &operators::inv_back, "Inv back function", py::arg("x"), py::arg("d")); + basis.def("relu_back", &operators::relu_back, "ReLU back function", py::arg("x"), py::arg("d")); + + // task 3 + basis.def("negList", &operators::negList, "Negate a list of integers", py::arg("lst")); + + // task 4, 5 + basis.def("addLists", &operators::addLists, "Add two lists of integers", py::arg("lst1"), py::arg("lst2")); + + // task 6 + basis.def("sumList", &operators::sumList, "Sum a list of integers", py::arg("lst")); + + // task 7 + basis.def("prodList", &operators::prodList, "Multiply a list of integers", py::arg("lst")); + + py::module autodiff = framework.def_submodule("autodiff", "Autodiff modules"); + autodiff.def("test_central_difference", &autodiff::test_central_difference, "Test central difference"); + + autodiff.def("test_addscalar", &autodiff::test_addscalar, "Test add scalar"); + + autodiff.def("test_mulscalar", &autodiff::test_mulscalar, "Test mul scalar"); + + autodiff.def("test_logscalar", &autodiff::test_logscalar, "Test log scalar"); + + autodiff.def("test_invscalar", &autodiff::test_invscalar, "Test inv scalar"); + + autodiff.def("test_sigmoidscalar", &autodiff::test_sigmoidscalar, "Test sigmoid scalar"); +} + diff --git a/frontend/framework/autodiff/test_task7.py b/frontend/framework/autodiff/test_task7.py new file mode 100644 index 0000000..01f1447 --- /dev/null +++ b/frontend/framework/autodiff/test_task7.py @@ -0,0 +1,16 @@ +from uctc.framework import autodiff +import numpy as np +from functools import reduce +import random + +lst = [autodiff.test_central_difference, autodiff.test_addscalar, autodiff.test_mulscalar, autodiff.test_logscalar, autodiff.test_invscalar, autodiff.test_sigmoidscalar] +for e in lst: + if e(): + print(f"\033[1;34mPassed: {e.__name__} passed all tests\033[0m") + else: + print(f"\033[1;31mError: {e.__name__} failed test... expects true but gets false\033[0m") + exit(0) + + + +print(f"\033[1;32m[PASSED] Task 3 finished!\033[0m") \ No newline at end of file diff --git a/frontend/framework/basis/config.py b/frontend/framework/basis/config.py new file mode 100644 index 0000000..4c1313e --- /dev/null +++ b/frontend/framework/basis/config.py @@ -0,0 +1,2 @@ +# change this +lib_path = "/home/hexu/learn/uc-modern-cpp-student/cc/build/" \ No newline at end of file diff --git a/frontend/framework/basis/test_task1.py b/frontend/framework/basis/test_task1.py new file mode 100644 index 0000000..dc150ac --- /dev/null +++ b/frontend/framework/basis/test_task1.py @@ -0,0 +1,46 @@ +import numpy as np +import math +from uctc.framework import basis +binary_arguments = [ + (1, 2), + (-2, 1), + (1, 1), + (2, -2), + (1, 3), + (3, 1), + (-3, 3), + (4, 5), + (5, 4), + (4, 4), + (5, 5) +] + +singular_arguments = [ + 1, 2, 4, -32, 42, 28, 0, 100, -1000, 10000, -100000 +] + +def iterate_binary_arguments(func, std_func): + for argument in binary_arguments: + if func(*argument) != std_func(*argument): + print(f"\033[1;31mError: {func.__name__}({argument}) = {func(*argument)} != {std_func.__name__}({argument}) = {std_func(*argument)}\033[0m") + exit(0) + print(f"\033[1;34mPassed: {func.__name__} passed all tests\033[0m") + return True + +def iterate_singular_arguments(func, std_func): + for argument in singular_arguments: + if func(argument) != std_func(argument): + print(f"\033[1;31mError: {func.__name__}({argument}) = {func(argument)} != {std_func.__name__}({argument}) = {std_func(argument)}\033[0m") + exit(0) + print(f"\033[1;34mPassed: {func.__name__} passed all tests\033[0m") + return True + +# Test task 1 +iterate_binary_arguments(basis.mul, lambda x, y: x * y) +iterate_singular_arguments(basis.id, lambda x: x) +iterate_binary_arguments(basis.add, lambda x, y: x + y) +iterate_singular_arguments(basis.neg, lambda x: -x) +iterate_binary_arguments(basis.lt, lambda x, y: int(x < y)) +iterate_binary_arguments(basis.eq, lambda x, y: int(x == y)) +iterate_binary_arguments(basis.max, lambda x, y: max(x, y)) +print(f"\033[1;32m[PASSED] Task 1 finished!\033[0m") \ No newline at end of file diff --git a/frontend/framework/basis/test_task2.py b/frontend/framework/basis/test_task2.py new file mode 100644 index 0000000..c9dfd8b --- /dev/null +++ b/frontend/framework/basis/test_task2.py @@ -0,0 +1,55 @@ +from uctc.framework import basis +import numpy as np +import math + +binary_arguments = [ + (1.0, 2.0), + (2.0, 1.0), + (-1.0, 1.0), + (2.0, -2.0), + (1.0, 3.0), + (3.0, -1.0), + (3.0, 3.0), + (-4.0, -5.0), + (5.0, 4.0), + (4.0, 4.0), + (5.0, 5.0) +] + +singular_arguments = [ + 1.0, -3.2, 4.3, 5.5, -6.7, 4.8, 3.33, 2.22, 1.11 +] + +def is_close(x, y): + return abs(x - y) < 1e-5 + +def sigmoid(x): + if x >= 0: + return 1 / (1 + math.exp(-x)) + else: + return math.exp(x) / (1 + math.exp(x)) + +def iterate_binary_arguments(func, std_func): + for argument in binary_arguments: + if not is_close(func(*argument), std_func(*argument)): + print(f"\033[1;31mError: {func.__name__}({argument}) = {func(*argument)} != {std_func.__name__}({argument}) = {std_func(*argument)}\033[0m") + exit(0) + print(f"\033[1;34mPassed: {func.__name__} passed all tests\033[0m") + return True + +def iterate_singular_arguments(func, std_func): + for argument in singular_arguments: + if not is_close(func(argument), std_func(argument)): + print(f"\033[1;31mError: {func.__name__}({argument}) = {func(argument)} != {std_func.__name__}({argument}) = {std_func(argument)}\033[0m") + exit(0) + print(f"\033[1;34mPassed: {func.__name__} passed all tests\033[0m") + return True + +# Test task 1 +iterate_binary_arguments(basis.is_close, lambda x, y: 1.0*int(is_close(x, y))) +iterate_singular_arguments(basis.sigmoid, lambda x: sigmoid(x)) +iterate_singular_arguments(basis.relu, lambda x: x if x > 0.0 else 0.0) +iterate_singular_arguments(basis.inv, lambda x: 1.0/x) +iterate_binary_arguments(basis.inv_back, lambda x, d: -d/(x*x)) +iterate_binary_arguments(basis.relu_back, lambda x, d: d * 1.0 if x > 0.0 else 0.0) +print(f"\033[1;32m[PASSED] Task 2 finished!\033[0m") \ No newline at end of file diff --git a/frontend/framework/basis/test_task3.py b/frontend/framework/basis/test_task3.py new file mode 100644 index 0000000..9987e17 --- /dev/null +++ b/frontend/framework/basis/test_task3.py @@ -0,0 +1,20 @@ +from uctc.framework import basis +import numpy as np +import math +import random + +def is_close(x, y): + return abs(x - y) < 1e-5 + +arr = [random.random() for i in range(128)] + +test_x = basis.negList(arr) + +test_y = [-e for e in arr] + +for i, (x, y) in enumerate(zip(test_x, test_y)): + if not is_close(x, y): + print(f"\033[1;31mError: {basis.negList.__name__} failed test at position {i}, expects {y} but gets {x}\033[0m") + exit(0) +print(f"\033[1;34mPassed: {basis.negList.__name__} passed all tests\033[0m") +print(f"\033[1;32m[PASSED] Task 3 finished!\033[0m") \ No newline at end of file diff --git a/frontend/framework/basis/test_task4_5.py b/frontend/framework/basis/test_task4_5.py new file mode 100644 index 0000000..945d3e4 --- /dev/null +++ b/frontend/framework/basis/test_task4_5.py @@ -0,0 +1,21 @@ +from uctc.framework import basis +import numpy as np +import math +import random + +def is_close(x, y): + return abs(x - y) < 1e-5 + +arr_a = [random.random() for i in range(128)] +arr_b = [random.random() for i in range(128)] + +test_x = basis.addLists(arr_a, arr_b) + +test_y = [e1 + e2 for e1, e2 in zip(arr_a, arr_b)] + +for i, (x, y) in enumerate(zip(test_x, test_y)): + if not is_close(x, y): + print(f"\033[1;31mError: {basis.addLists.__name__} failed test at position {i}, expects {y} but gets {x}\033[0m") + exit(0) +print(f"\033[1;34mPassed: {basis.addLists.__name__} passed all tests\033[0m") +print(f"\033[1;32m[PASSED] Task 4 finished!\033[0m") \ No newline at end of file diff --git a/frontend/framework/basis/test_task6.py b/frontend/framework/basis/test_task6.py new file mode 100644 index 0000000..b020353 --- /dev/null +++ b/frontend/framework/basis/test_task6.py @@ -0,0 +1,30 @@ +from uctc.framework import basis +import numpy as np +from functools import reduce +import random + +def is_close(x, y): + return abs(x - y) < 1e-3 + +arr = [random.random() for i in range(128)] + +test_x1 = basis.sumList(arr) + +test_x2 = basis.prodList(arr) + +test_y1 = reduce(lambda x, y: x + y, arr, 0.0) + +test_y2 = reduce(lambda x, y: x * y, arr, 1.0) + + +if not is_close(test_x1, test_y1): + print(f"\033[1;31mError: {basis.sumList.__name__} failed test... expects {test_y1} but gets {test_x1}\033[0m") + exit(0) +print(f"\033[1;34mPassed: {basis.sumList.__name__} passed all tests\033[0m") + +if not is_close(test_x2, test_y2): + print(f"\033[1;31mError: {basis.prodList.__name__} failed test... expects {test_y2} but gets {test_x2}\033[0m") + exit(0) +print(f"\033[1;34mPassed: {basis.prodList.__name__} passed all tests\033[0m") + +print(f"\033[1;32m[PASSED] Task 3 finished!\033[0m") \ No newline at end of file diff --git a/frontend/framework/tensor/task13_14.py b/frontend/framework/tensor/task13_14.py new file mode 100644 index 0000000..32e4f0c --- /dev/null +++ b/frontend/framework/tensor/task13_14.py @@ -0,0 +1,41 @@ +import numpy as np + +import uctc.nn as nn + +tensor1 = np.random.rand(42, 48) + +tensor2 = nn.pyarray_to_tensor(tensor1) + +t_tensor1 = tensor1.transpose() + +t_tensor2 = tensor2.transpose() + +t_2data = t_tensor2.data() + +t_1data = t_tensor1.flatten().tolist() + +def is_close(x, y): + return abs(x - y) < 1e-5 + +for i in range(len(t_1data)): + if not is_close(t_1data[i], t_2data[i]): + print(f"\033[1;31mTask 13 Error: t1 data[{i}] != t2 data[{i}]\033[0m") + exit(0) + +at2 = nn.argmax(tensor2, 0).data() +at1 = np.argmax(tensor1, 0).flatten().tolist() + +for i in range(len(at1)): + if not is_close(at1[i], at2[i]): + print(f"\033[1;31mTask 14 Error: at1 data[{i}] != at2 data[{i}]\033[0m") + exit(0) + +at4 = nn.argmax(tensor2, 1).data() +at3 = np.argmax(tensor1, 1).flatten().tolist() + +for i in range(len(at1)): + if not is_close(at1[i], at2[i]): + print(f"\033[1;31mTask 14 Error: at3 data[{i}] != at4 data[{i}]\033[0m") + exit(0) + +print(f"\033[1;32m[PASSED] Task 13-14 finished!\033[0m") \ No newline at end of file diff --git a/frontend/mnist/autofrader.py b/frontend/mnist/autofrader.py new file mode 100644 index 0000000..3763fbe --- /dev/null +++ b/frontend/mnist/autofrader.py @@ -0,0 +1,579 @@ +# A custom autograder for this project + +################################################################################ +# A mini-framework for autograding +################################################################################ + +import optparse +import pickle +import random +import sys +import traceback + +class WritableNull: + def write(self, string): + pass + + def flush(self): + pass + +class Tracker(object): + def __init__(self, questions, maxes, prereqs, mute_output): + self.questions = questions + self.maxes = maxes + self.prereqs = prereqs + + self.points = {q: 0 for q in self.questions} + + self.current_question = None + + self.current_test = None + self.points_at_test_start = None + self.possible_points_remaining = None + + self.mute_output = mute_output + self.original_stdout = None + self.muted = False + + def mute(self): + if self.muted: + return + + self.muted = True + self.original_stdout = sys.stdout + sys.stdout = WritableNull() + + def unmute(self): + if not self.muted: + return + + self.muted = False + sys.stdout = self.original_stdout + + def begin_q(self, q): + assert q in self.questions + text = 'Question {}'.format(q) + print('\n' + text) + print('=' * len(text)) + + for prereq in sorted(self.prereqs[q]): + if self.points[prereq] < self.maxes[prereq]: + print("""*** NOTE: Make sure to complete Question {} before working on Question {}, +*** because Question {} builds upon your answer for Question {}. +""".format(prereq, q, q, prereq)) + return False + + self.current_question = q + self.possible_points_remaining = self.maxes[q] + return True + + def begin_test(self, test_name): + self.current_test = test_name + self.points_at_test_start = self.points[self.current_question] + print("*** {}) {}".format(self.current_question, self.current_test)) + if self.mute_output: + self.mute() + + def end_test(self, pts): + if self.mute_output: + self.unmute() + self.possible_points_remaining -= pts + if self.points[self.current_question] == self.points_at_test_start + pts: + print("*** PASS: {}".format(self.current_test)) + elif self.points[self.current_question] == self.points_at_test_start: + print("*** FAIL") + + self.current_test = None + self.points_at_test_start = None + + def end_q(self): + assert self.current_question is not None + assert self.possible_points_remaining == 0 + print('\n### Question {}: {}/{} ###'.format( + self.current_question, + self.points[self.current_question], + self.maxes[self.current_question])) + + self.current_question = None + self.possible_points_remaining = None + + def finalize(self): + import time + print('\nFinished at %d:%02d:%02d' % time.localtime()[3:6]) + print("\nProvisional grades\n==================") + + for q in self.questions: + print('Question %s: %d/%d' % (q, self.points[q], self.maxes[q])) + print('------------------') + print('Total: %d/%d' % (sum(self.points.values()), + sum([self.maxes[q] for q in self.questions]))) + + print(""" +Your grades are NOT yet registered. To register your grades, make sure +to follow your instructor's guidelines to receive credit on your project. +""") + + def add_points(self, pts): + self.points[self.current_question] += pts + +TESTS = [] +PREREQS = {} +def add_prereq(q, pre): + if isinstance(pre, str): + pre = [pre] + + if q not in PREREQS: + PREREQS[q] = set() + PREREQS[q] |= set(pre) + +def test(q, points): + def deco(fn): + TESTS.append((q, points, fn)) + return fn + return deco + +def parse_options(argv): + parser = optparse.OptionParser(description = 'Run public tests on student code') + parser.set_defaults( + edx_output=False, + gs_output=False, + no_graphics=False, + mute_output=False, + check_dependencies=False, + ) + parser.add_option('--edx-output', + dest = 'edx_output', + action = 'store_true', + help = 'Ignored, present for compatibility only') + parser.add_option('--gradescope-output', + dest = 'gs_output', + action = 'store_true', + help = 'Ignored, present for compatibility only') + parser.add_option('--question', '-q', + dest = 'grade_question', + default = None, + help = 'Grade only one question (e.g. `-q q1`)') + parser.add_option('--no-graphics', + dest = 'no_graphics', + action = 'store_true', + help = 'Do not display graphics (visualizing your implementation is highly recommended for debugging).') + parser.add_option('--mute', + dest = 'mute_output', + action = 'store_true', + help = 'Mute output from executing tests') + parser.add_option('--check-dependencies', + dest = 'check_dependencies', + action = 'store_true', + help = 'check that numpy and matplotlib are installed') + (options, args) = parser.parse_args(argv) + return options + +def main(): + options = parse_options(sys.argv) + if options.check_dependencies: + check_dependencies() + return + + if options.no_graphics: + disable_graphics() + + questions = set() + maxes = {} + for q, points, fn in TESTS: + questions.add(q) + maxes[q] = maxes.get(q, 0) + points + if q not in PREREQS: + PREREQS[q] = set() + + questions = list(sorted(questions)) + if options.grade_question: + if options.grade_question not in questions: + print("ERROR: question {} does not exist".format(options.grade_question)) + sys.exit(1) + else: + questions = [options.grade_question] + PREREQS[options.grade_question] = set() + + tracker = Tracker(questions, maxes, PREREQS, options.mute_output) + for q in questions: + started = tracker.begin_q(q) + if not started: + continue + + for testq, points, fn in TESTS: + if testq != q: + continue + tracker.begin_test(fn.__name__) + try: + fn(tracker) + except KeyboardInterrupt: + tracker.unmute() + print("\n\nCaught KeyboardInterrupt: aborting autograder") + tracker.finalize() + print("\n[autograder was interrupted before finishing]") + sys.exit(1) + except: + tracker.unmute() + print(traceback.format_exc()) + tracker.end_test(points) + tracker.end_q() + tracker.finalize() + +################################################################################ +# Tests begin here +################################################################################ + +import numpy as np +import matplotlib +import contextlib + +import nn +import backend + +def check_dependencies(): + import matplotlib.pyplot as plt + import time + fig, ax = plt.subplots(1, 1) + ax.set_xlim([-1, 1]) + ax.set_ylim([-1, 1]) + line, = ax.plot([], [], color="black") + plt.show(block=False) + + for t in range(400): + angle = t * 0.05 + x = np.sin(angle) + y = np.cos(angle) + line.set_data([x,-x], [y,-y]) + fig.canvas.draw_idle() + fig.canvas.start_event_loop(1e-3) + +def disable_graphics(): + backend.use_graphics = False + +@contextlib.contextmanager +def no_graphics(): + old_use_graphics = backend.use_graphics + backend.use_graphics = False + yield + backend.use_graphics = old_use_graphics + +def verify_node(node, expected_type, expected_shape, method_name): + if expected_type == 'parameter': + assert node is not None, ( + "{} should return an instance of nn.Parameter, not None".format(method_name)) + assert isinstance(node, nn.Parameter), ( + "{} should return an instance of nn.Parameter, instead got type {!r}".format( + method_name, type(node).__name__)) + elif expected_type == 'loss': + assert node is not None, ( + "{} should return an instance a loss node, not None".format(method_name)) + assert isinstance(node, (nn.SquareLoss, nn.SoftmaxLoss)), ( + "{} should return a loss node, instead got type {!r}".format( + method_name, type(node).__name__)) + elif expected_type == 'node': + assert node is not None, ( + "{} should return a node object, not None".format(method_name)) + assert isinstance(node, nn.Node), ( + "{} should return a node object, instead got type {!r}".format( + method_name, type(node).__name__)) + else: + assert False, "If you see this message, please report a bug in the autograder" + + if expected_type != 'loss': + assert all([(expected is '?' or actual == expected) for (actual, expected) in zip(node.data.shape, expected_shape)]), ( + "{} should return an object with shape {}, got {}".format( + method_name, nn.format_shape(expected_shape), nn.format_shape(node.data.shape))) + +def trace_node(node_to_trace): + """ + Returns a set containing the node and all ancestors in the computation graph + """ + nodes = set() + tape = [] + + def visit(node): + if node not in nodes: + for parent in node.parents: + visit(parent) + nodes.add(node) + tape.append(node) + + visit(node_to_trace) + + return nodes + +@test('q1', points=6) +def check_perceptron(tracker): + import models + + print("Sanity checking perceptron...") + np_random = np.random.RandomState(0) + # Check that the perceptron weights are initialized to a vector with `dimensions` entries. + for dimensions in range(1, 10): + p = models.PerceptronModel(dimensions) + p_weights = p.get_weights() + verify_node(p_weights, 'parameter', (1, dimensions), "PerceptronModel.get_weights()") + + # Check that run returns a node, and that the score in the node is correct + for dimensions in range(1, 10): + p = models.PerceptronModel(dimensions) + p_weights = p.get_weights() + verify_node(p_weights, 'parameter', (1, dimensions), "PerceptronModel.get_weights()") + point = np_random.uniform(-10, 10, (1, dimensions)) + score = p.run(nn.Constant(point)) + verify_node(score, 'node', (1, 1), "PerceptronModel.run()") + calculated_score = nn.as_scalar(score) + expected_score = float(np.dot(point.flatten(), p_weights.data.flatten())) + assert np.isclose(calculated_score, expected_score), ( + "The score computed by PerceptronModel.run() ({:.4f}) does not match the expected score ({:.4f})".format( + calculated_score, expected_score)) + + # Check that get_prediction returns the correct values, including the + # case when a point lies exactly on the decision boundary + for dimensions in range(1, 10): + p = models.PerceptronModel(dimensions) + random_point = np_random.uniform(-10, 10, (1, dimensions)) + for point in (random_point, np.zeros_like(random_point)): + prediction = p.get_prediction(nn.Constant(point)) + assert prediction == 1 or prediction == -1, ( + "PerceptronModel.get_prediction() should return 1 or -1, not {}".format( + prediction)) + + expected_prediction = np.asscalar(np.where(np.dot(point, p.get_weights().data.T) >= 0, 1, -1)) + assert prediction == expected_prediction, ( + "PerceptronModel.get_prediction() returned {}; expected {}".format( + prediction, expected_prediction)) + + tracker.add_points(2) # Partial credit for passing sanity checks + + print("Sanity checking perceptron weight updates...") + + # Test weight updates. This involves constructing a dataset that + # requires 0 or 1 updates before convergence, and testing that weight + # values change as expected. Note that (multiplier < -1 or multiplier > 1) + # must be true for the testing code to be correct. + dimensions = 2 + for multiplier in (-5, -2, 2, 5): + p = models.PerceptronModel(dimensions) + orig_weights = p.get_weights().data.reshape((1, dimensions)).copy() + if np.abs(orig_weights).sum() == 0.0: + # This autograder test doesn't work when weights are exactly zero + continue + point = multiplier * orig_weights + sanity_dataset = backend.Dataset( + x=np.tile(point, (500, 1)), + y=np.ones((500, 1)) * -1.0 + ) + p.train(sanity_dataset) + new_weights = p.get_weights().data.reshape((1, dimensions)) + + if multiplier < 0: + expected_weights = orig_weights + else: + expected_weights = orig_weights - point + + if not np.all(new_weights == expected_weights): + print() + print("Initial perceptron weights were: [{:.4f}, {:.4f}]".format( + orig_weights[0,0], orig_weights[0,1])) + print("All data points in the dataset were identical and had:") + print(" x = [{:.4f}, {:.4f}]".format( + point[0,0], point[0,1])) + print(" y = -1") + print("Your trained weights were: [{:.4f}, {:.4f}]".format( + new_weights[0,0], new_weights[0,1])) + print("Expected weights after training: [{:.4f}, {:.4f}]".format( + expected_weights[0,0], expected_weights[0,1])) + print() + assert False, "Weight update sanity check failed" + + print("Sanity checking complete. Now training perceptron") + model = models.PerceptronModel(3) + dataset = backend.PerceptronDataset(model) + + model.train(dataset) + backend.maybe_sleep_and_close(1) + + assert dataset.epoch != 0, "Perceptron code never iterated over the training data" + + accuracy = np.mean(np.where(np.dot(dataset.x, model.get_weights().data.T) >= 0.0, 1.0, -1.0) == dataset.y) + if accuracy < 1.0: + print("The weights learned by your perceptron correctly classified {:.2%} of training examples".format(accuracy)) + print("To receive full points for this question, your perceptron must converge to 100% accuracy") + return + + tracker.add_points(4) + +@test('q2', points=6) +def check_regression(tracker): + import models + model = models.RegressionModel() + dataset = backend.RegressionDataset(model) + + detected_parameters = None + for batch_size in (1, 2, 4): + inp_x = nn.Constant(dataset.x[:batch_size]) + inp_y = nn.Constant(dataset.y[:batch_size]) + output_node = model.run(inp_x) + verify_node(output_node, 'node', (batch_size, 1), "RegressionModel.run()") + trace = trace_node(output_node) + assert inp_x in trace, "Node returned from RegressionModel.run() does not depend on the provided input (x)" + + if detected_parameters is None: + detected_parameters = [node for node in trace if isinstance(node, nn.Parameter)] + + for node in trace: + assert not isinstance(node, nn.Parameter) or node in detected_parameters, ( + "Calling RegressionModel.run() multiple times should always re-use the same parameters, but a new nn.Parameter object was detected") + + for batch_size in (1, 2, 4): + inp_x = nn.Constant(dataset.x[:batch_size]) + inp_y = nn.Constant(dataset.y[:batch_size]) + loss_node = model.get_loss(inp_x, inp_y) + verify_node(loss_node, 'loss', None, "RegressionModel.get_loss()") + trace = trace_node(loss_node) + assert inp_x in trace, "Node returned from RegressionModel.get_loss() does not depend on the provided input (x)" + assert inp_y in trace, "Node returned from RegressionModel.get_loss() does not depend on the provided labels (y)" + + for node in trace: + assert not isinstance(node, nn.Parameter) or node in detected_parameters, ( + "RegressionModel.get_loss() should not use additional parameters not used by RegressionModel.run()") + + tracker.add_points(2) # Partial credit for passing sanity checks + + model.train(dataset) + backend.maybe_sleep_and_close(1) + + train_loss = model.get_loss(nn.Constant(dataset.x), nn.Constant(dataset.y)) + verify_node(train_loss, 'loss', None, "RegressionModel.get_loss()") + train_loss = nn.as_scalar(train_loss) + + # Re-compute the loss ourselves: otherwise get_loss() could be hard-coded + # to always return zero + train_predicted = model.run(nn.Constant(dataset.x)) + verify_node(train_predicted, 'node', (dataset.x.shape[0], 1), "RegressionModel.run()") + sanity_loss = 0.5 * np.mean((train_predicted.data - dataset.y)**2) + + assert np.isclose(train_loss, sanity_loss), ( + "RegressionModel.get_loss() returned a loss of {:.4f}, " + "but the autograder computed a loss of {:.4f} " + "based on the output of RegressionModel.run()".format( + train_loss, sanity_loss)) + + loss_threshold = 0.02 + if train_loss <= loss_threshold: + print("Your final loss is: {:f}".format(train_loss)) + tracker.add_points(4) + else: + print("Your final loss ({:f}) must be no more than {:.4f} to receive full points for this question".format(train_loss, loss_threshold)) + +@test('q3', points=6) +def check_digit_classification(tracker): + import models + model = models.DigitClassificationModel() + dataset = backend.DigitClassificationDataset(model) + + detected_parameters = None + for batch_size in (1, 2, 4): + inp_x = nn.Constant(dataset.x[:batch_size]) + inp_y = nn.Constant(dataset.y[:batch_size]) + output_node = model.run(inp_x) + verify_node(output_node, 'node', (batch_size, 10), "DigitClassificationModel.run()") + trace = trace_node(output_node) + assert inp_x in trace, "Node returned from DigitClassificationModel.run() does not depend on the provided input (x)" + + if detected_parameters is None: + detected_parameters = [node for node in trace if isinstance(node, nn.Parameter)] + + for node in trace: + assert not isinstance(node, nn.Parameter) or node in detected_parameters, ( + "Calling DigitClassificationModel.run() multiple times should always re-use the same parameters, but a new nn.Parameter object was detected") + + for batch_size in (1, 2, 4): + inp_x = nn.Constant(dataset.x[:batch_size]) + inp_y = nn.Constant(dataset.y[:batch_size]) + loss_node = model.get_loss(inp_x, inp_y) + verify_node(loss_node, 'loss', None, "DigitClassificationModel.get_loss()") + trace = trace_node(loss_node) + assert inp_x in trace, "Node returned from DigitClassificationModel.get_loss() does not depend on the provided input (x)" + assert inp_y in trace, "Node returned from DigitClassificationModel.get_loss() does not depend on the provided labels (y)" + + for node in trace: + assert not isinstance(node, nn.Parameter) or node in detected_parameters, ( + "DigitClassificationModel.get_loss() should not use additional parameters not used by DigitClassificationModel.run()") + + tracker.add_points(2) # Partial credit for passing sanity checks + + model.train(dataset) + + test_logits = model.run(nn.Constant(dataset.test_images)).data + test_predicted = np.argmax(test_logits, axis=1) + test_accuracy = np.mean(test_predicted == dataset.test_labels) + + accuracy_threshold = 0.97 + if test_accuracy >= accuracy_threshold: + print("Your final test set accuracy is: {:%}".format(test_accuracy)) + tracker.add_points(4) + else: + print("Your final test set accuracy ({:%}) must be at least {:.0%} to receive full points for this question".format(test_accuracy, accuracy_threshold)) + +@test('q4', points=7) +def check_lang_id(tracker): + import models + model = models.LanguageIDModel() + dataset = backend.LanguageIDDataset(model) + + detected_parameters = None + for batch_size, word_length in ((1, 1), (2, 1), (2, 6), (4, 8)): + start = dataset.dev_buckets[-1, 0] + end = start + batch_size + inp_xs, inp_y = dataset._encode(dataset.dev_x[start:end], dataset.dev_y[start:end]) + inp_xs = inp_xs[:word_length] + + output_node = model.run(inp_xs) + verify_node(output_node, 'node', (batch_size, len(dataset.language_names)), "LanguageIDModel.run()") + trace = trace_node(output_node) + for inp_x in inp_xs: + assert inp_x in trace, "Node returned from LanguageIDModel.run() does not depend on all of the provided inputs (xs)" + + # Word length 1 does not use parameters related to transferring the + # hidden state across timesteps, so initial parameter detection is only + # run for longer words + if word_length > 1: + if detected_parameters is None: + detected_parameters = [node for node in trace if isinstance(node, nn.Parameter)] + + for node in trace: + assert not isinstance(node, nn.Parameter) or node in detected_parameters, ( + "Calling LanguageIDModel.run() multiple times should always re-use the same parameters, but a new nn.Parameter object was detected") + + for batch_size, word_length in ((1, 1), (2, 1), (2, 6), (4, 8)): + start = dataset.dev_buckets[-1, 0] + end = start + batch_size + inp_xs, inp_y = dataset._encode(dataset.dev_x[start:end], dataset.dev_y[start:end]) + inp_xs = inp_xs[:word_length] + loss_node = model.get_loss(inp_xs, inp_y) + trace = trace_node(loss_node) + for inp_x in inp_xs: + assert inp_x in trace, "Node returned from LanguageIDModel.run() does not depend on all of the provided inputs (xs)" + assert inp_y in trace, "Node returned from LanguageIDModel.get_loss() does not depend on the provided labels (y)" + + for node in trace: + assert not isinstance(node, nn.Parameter) or node in detected_parameters, ( + "LanguageIDModel.get_loss() should not use additional parameters not used by LanguageIDModel.run()") + + tracker.add_points(2) # Partial credit for passing sanity checks + + model.train(dataset) + + test_predicted_probs, test_predicted, test_correct = dataset._predict('test') + test_accuracy = np.mean(test_predicted == test_correct) + accuracy_threshold = 0.81 + if test_accuracy >= accuracy_threshold: + print("Your final test set accuracy is: {:%}".format(test_accuracy)) + tracker.add_points(5) + else: + print("Your final test set accuracy ({:%}) must be at least {:.0%} to receive full points for this question".format(test_accuracy, accuracy_threshold)) + +if __name__ == '__main__': + main() diff --git a/frontend/mnist/backend.py b/frontend/mnist/backend.py new file mode 100644 index 0000000..0885645 --- /dev/null +++ b/frontend/mnist/backend.py @@ -0,0 +1,449 @@ +import collections +import os +import time +import os + +import matplotlib.pyplot as plt +import numpy as np + +import nn + +use_graphics = True + +def maybe_sleep_and_close(seconds): + if use_graphics and plt.get_fignums(): + time.sleep(seconds) + for fignum in plt.get_fignums(): + fig = plt.figure(fignum) + plt.close(fig) + try: + # This raises a TclError on some Windows machines + fig.canvas.start_event_loop(1e-3) + except: + pass + +def get_data_path(filename): + path = os.path.join( + os.path.dirname(__file__), os.pardir, "data", filename) + if not os.path.exists(path): + path = os.path.join( + os.path.dirname(__file__), "data", filename) + if not os.path.exists(path): + path = os.path.join( + os.path.dirname(__file__), filename) + if not os.path.exists(path): + raise Exception("Could not find data file: {}".format(filename)) + return path + +class Dataset(object): + def __init__(self, x, y): + assert isinstance(x, np.ndarray) + assert isinstance(y, np.ndarray) + assert np.issubdtype(x.dtype, np.floating) + assert np.issubdtype(y.dtype, np.floating) + assert x.ndim == 2 + assert y.ndim == 2 + assert x.shape[0] == y.shape[0] + self.x = x + self.y = y + + def iterate_once(self, batch_size): + assert isinstance(batch_size, int) and batch_size > 0, ( + "Batch size should be a positive integer, got {!r}".format( + batch_size)) + assert self.x.shape[0] % batch_size == 0, ( + "Dataset size {:d} is not divisible by batch size {:d}".format( + self.x.shape[0], batch_size)) + index = 0 + while index < self.x.shape[0]: + x = self.x[index:index + batch_size] + y = self.y[index:index + batch_size] + yield nn.Constant(x), nn.Constant(y) + index += batch_size + + def iterate_forever(self, batch_size): + while True: + yield from self.iterate_once(batch_size) + + def get_validation_accuracy(self): + raise NotImplementedError( + "No validation data is available for this dataset. " + "In this assignment, only the Digit Classification and Language " + "Identification datasets have validation data.") + +class PerceptronDataset(Dataset): + def __init__(self, model): + points = 500 + x = np.hstack([np.random.randn(points, 2), np.ones((points, 1))]) + y = np.where(x[:, 0] + 2 * x[:, 1] - 1 >= 0, 1.0, -1.0) + super().__init__(x, np.expand_dims(y, axis=1)) + + self.model = model + self.epoch = 0 + + if use_graphics: + fig, ax = plt.subplots(1, 1) + limits = np.array([-3.0, 3.0]) + ax.set_xlim(limits) + ax.set_ylim(limits) + positive = ax.scatter(*x[y == 1, :-1].T, color="red", marker="+") + negative = ax.scatter(*x[y == -1, :-1].T, color="blue", marker="_") + line, = ax.plot([], [], color="black") + text = ax.text(0.03, 0.97, "", transform=ax.transAxes, va="top") + ax.legend([positive, negative], [1, -1]) + plt.show(block=False) + + self.fig = fig + self.limits = limits + self.line = line + self.text = text + self.last_update = time.time() + + def iterate_once(self, batch_size): + self.epoch += 1 + + for i, (x, y) in enumerate(super().iterate_once(batch_size)): + yield x, y + + if use_graphics and time.time() - self.last_update > 0.01: + w = self.model.get_weights().data.flatten() + limits = self.limits + if w[1] != 0: + self.line.set_data(limits, (-w[0] * limits - w[2]) / w[1]) + elif w[0] != 0: + self.line.set_data(np.full(2, -w[2] / w[0]), limits) + else: + self.line.set_data([], []) + self.text.set_text( + "epoch: {:,}\npoint: {:,}/{:,}\nweights: {}".format( + self.epoch, i * batch_size + 1, len(self.x), w)) + self.fig.canvas.draw_idle() + self.fig.canvas.start_event_loop(1e-3) + self.last_update = time.time() + +class RegressionDataset(Dataset): + def __init__(self, model): + x = np.expand_dims(np.linspace(-2 * np.pi, 2 * np.pi, num=200), axis=1) + np.random.RandomState(0).shuffle(x) + self.argsort_x = np.argsort(x.flatten()) + y = np.sin(x) + super().__init__(x, y) + + self.model = model + self.processed = 0 + + if use_graphics: + fig, ax = plt.subplots(1, 1) + ax.set_xlim(-2 * np.pi, 2 * np.pi) + ax.set_ylim(-1.4, 1.4) + real, = ax.plot(x[self.argsort_x], y[self.argsort_x], color="blue") + learned, = ax.plot([], [], color="red") + text = ax.text(0.03, 0.97, "", transform=ax.transAxes, va="top") + ax.legend([real, learned], ["real", "learned"]) + plt.show(block=False) + + self.fig = fig + self.learned = learned + self.text = text + self.last_update = time.time() + + def iterate_once(self, batch_size): + for x, y in super().iterate_once(batch_size): + yield x, y + self.processed += batch_size + + if use_graphics and time.time() - self.last_update > 0.1: + predicted = self.model.run(nn.Constant(self.x)).data + loss = self.model.get_loss( + nn.Constant(self.x), nn.Constant(self.y)).data + self.learned.set_data(self.x[self.argsort_x], predicted[self.argsort_x]) + self.text.set_text("processed: {:,}\nloss: {:.6f}".format( + self.processed, loss)) + self.fig.canvas.draw_idle() + self.fig.canvas.start_event_loop(1e-3) + self.last_update = time.time() + +class DigitClassificationDataset(Dataset): + def __init__(self, model): + mnist_path = get_data_path("mnist.npz") + + with np.load(mnist_path) as data: + train_images = data["train_images"] + train_labels = data["train_labels"] + test_images = data["test_images"] + test_labels = data["test_labels"] + assert len(train_images) == len(train_labels) == 60000 + assert len(test_images) == len(test_labels) == 10000 + self.dev_images = test_images[0::2] + self.dev_labels = test_labels[0::2] + self.test_images = test_images[1::2] + self.test_labels = test_labels[1::2] + + train_labels_one_hot = np.zeros((len(train_images), 10)) + train_labels_one_hot[range(len(train_images)), train_labels] = 1 + + super().__init__(train_images, train_labels_one_hot) + + self.model = model + self.epoch = 0 + + if use_graphics: + width = 20 # Width of each row expressed as a multiple of image width + samples = 100 # Number of images to display per label + fig = plt.figure() + ax = {} + images = collections.defaultdict(list) + texts = collections.defaultdict(list) + for i in reversed(range(10)): + ax[i] = plt.subplot2grid((30, 1), (3 * i, 0), 2, 1, + sharex=ax.get(9)) + plt.setp(ax[i].get_xticklabels(), visible=i == 9) + ax[i].set_yticks([]) + ax[i].text(-0.03, 0.5, i, transform=ax[i].transAxes, + va="center") + ax[i].set_xlim(0, 28 * width) + ax[i].set_ylim(0, 28) + for j in range(samples): + images[i].append(ax[i].imshow( + np.zeros((28, 28)), vmin=0, vmax=1, cmap="Greens", + alpha=0.3)) + texts[i].append(ax[i].text( + 0, 0, "", ha="center", va="top", fontsize="smaller")) + ax[9].set_xticks(np.linspace(0, 28 * width, 11)) + ax[9].set_xticklabels( + ["{:.1f}".format(num) for num in np.linspace(0, 1, 11)]) + ax[9].tick_params(axis="x", pad=16) + ax[9].set_xlabel("Probability of Correct Label") + status = ax[0].text( + 0.5, 1.5, "", transform=ax[0].transAxes, ha="center", + va="bottom") + plt.show(block=False) + + self.width = width + self.samples = samples + self.fig = fig + self.images = images + self.texts = texts + self.status = status + self.last_update = time.time() + + def iterate_once(self, batch_size): + self.epoch += 1 + + for i, (x, y) in enumerate(super().iterate_once(batch_size)): + yield x, y + + if use_graphics and time.time() - self.last_update > 1: + dev_logits = self.model.run(nn.Constant(self.dev_images)).data + dev_predicted = np.argmax(dev_logits, axis=1) + dev_probs = np.exp(nn.SoftmaxLoss.log_softmax(dev_logits)) + dev_accuracy = np.mean(dev_predicted == self.dev_labels) + + self.status.set_text( + "epoch: {:d}, batch: {:d}/{:d}, validation accuracy: " + "{:.2%}".format( + self.epoch, i, len(self.x) // batch_size, dev_accuracy)) + for i in range(10): + predicted = dev_predicted[self.dev_labels == i] + probs = dev_probs[self.dev_labels == i][:, i] + linspace = np.linspace( + 0, len(probs) - 1, self.samples).astype(int) + indices = probs.argsort()[linspace] + for j, (prob, image) in enumerate(zip( + probs[indices], + self.dev_images[self.dev_labels == i][indices])): + self.images[i][j].set_data(image.reshape((28, 28))) + left = prob * (self.width - 1) * 28 + if predicted[indices[j]] == i: + self.images[i][j].set_cmap("Greens") + self.texts[i][j].set_text("") + else: + self.images[i][j].set_cmap("Reds") + self.texts[i][j].set_text(predicted[indices[j]]) + self.texts[i][j].set_x(left + 14) + self.images[i][j].set_extent([left, left + 28, 0, 28]) + self.fig.canvas.draw_idle() + self.fig.canvas.start_event_loop(1e-3) + self.last_update = time.time() + + def get_validation_accuracy(self): + # print(self.dev_images[:2].tolist()) + dev_logits = self.model.run(nn.Constant(self.dev_images)).data + # print(f"dev logits: {dev_logits.flatten()[10:20]}") + dev_predicted = np.argmax(dev_logits, axis=1) + dev_accuracy = np.mean(dev_predicted == self.dev_labels) + return dev_accuracy + +class LanguageIDDataset(Dataset): + def __init__(self, model): + self.model = model + + data_path = get_data_path("lang_id.npz") + + with np.load(data_path) as data: + self.chars = data['chars'] + self.language_codes = data['language_codes'] + self.language_names = data['language_names'] + + self.train_x = data['train_x'] + self.train_y = data['train_y'] + self.train_buckets = data['train_buckets'] + self.dev_x = data['dev_x'] + self.dev_y = data['dev_y'] + self.dev_buckets = data['dev_buckets'] + self.test_x = data['test_x'] + self.test_y = data['test_y'] + self.test_buckets = data['test_buckets'] + + self.epoch = 0 + self.bucket_weights = self.train_buckets[:,1] - self.train_buckets[:,0] + self.bucket_weights = self.bucket_weights / float(self.bucket_weights.sum()) + + self.chars_print = self.chars + try: + print(u"Alphabet: {}".format(u"".join(self.chars))) + except UnicodeEncodeError: + self.chars_print = "abcdefghijklmnopqrstuvwxyzaaeeeeiinoouuacelnszz" + print("Alphabet: " + self.chars_print) + self.chars_print = list(self.chars_print) + print(""" +NOTE: Your terminal does not appear to support printing Unicode characters. +For the purposes of printing to the terminal, some of the letters in the +alphabet above have been substituted with ASCII symbols.""".strip()) + print("") + + # Select some examples to spotlight in the monitoring phase (3 per language) + spotlight_idxs = [] + for i in range(len(self.language_names)): + idxs_lang_i = np.nonzero(self.dev_y == i)[0] + idxs_lang_i = np.random.choice(idxs_lang_i, size=3, replace=False) + spotlight_idxs.extend(list(idxs_lang_i)) + self.spotlight_idxs = np.array(spotlight_idxs, dtype=int) + + # Templates for printing updates as training progresses + max_word_len = self.dev_x.shape[1] + max_lang_len = max([len(x) for x in self.language_names]) + + self.predicted_template = u"Pred: {: 0, ( + "Batch size should be a positive integer, got {!r}".format( + batch_size)) + assert self.train_x.shape[0] >= batch_size, ( + "Dataset size {:d} is smaller than the batch size {:d}".format( + self.train_x.shape[0], batch_size)) + + self.epoch += 1 + + for iteration in range(self.train_x.shape[0] // batch_size): + bucket_id = np.random.choice(self.bucket_weights.shape[0], p=self.bucket_weights) + example_ids = self.train_buckets[bucket_id, 0] + np.random.choice( + self.train_buckets[bucket_id, 1] - self.train_buckets[bucket_id, 0], + size=batch_size) + + yield self._encode(self.train_x[example_ids], self.train_y[example_ids]) + + if use_graphics and time.time() - self.last_update > 0.5: + dev_predicted_probs, dev_predicted, dev_correct = self._predict() + dev_accuracy = np.mean(dev_predicted == dev_correct) + + print("epoch {:,} iteration {:,} validation-accuracy {:.1%}".format( + self.epoch, iteration, dev_accuracy)) + + for idx in self.spotlight_idxs: + correct = (dev_predicted[idx] == dev_correct[idx]) + word = u"".join([self.chars_print[ch] for ch in self.dev_x[idx] if ch != -1]) + + print(self.word_template.format( + word, + self.language_names[dev_correct[idx]], + dev_predicted_probs[idx, dev_correct[idx]], + "" if correct else self.predicted_template.format( + self.language_names[dev_predicted[idx]]), + probs=dev_predicted_probs[idx,:], + )) + + self.last_update = time.time() + + def get_validation_accuracy(self): + dev_predicted_probs, dev_predicted, dev_correct = self._predict() + dev_accuracy = np.mean(dev_predicted == dev_correct) + return dev_accuracy + + +def main(): + import models + # model = models.PerceptronModel(3) + # dataset = PerceptronDataset(model) + # model.train(dataset) + + # model = models.RegressionModel() + # dataset = RegressionDataset(model) + # model.train(dataset) + + model = models.DigitClassificationModel() + dataset = DigitClassificationDataset(model) + model.train(dataset) + + # model = models.LanguageIDModel() + # dataset = LanguageIDDataset(model) + # model.train(dataset) + +if __name__ == "__main__": + main() diff --git a/frontend/mnist/data/lang_id.npz b/frontend/mnist/data/lang_id.npz new file mode 100644 index 0000000..3974849 Binary files /dev/null and b/frontend/mnist/data/lang_id.npz differ diff --git a/frontend/mnist/data/mnist.npz b/frontend/mnist/data/mnist.npz new file mode 100644 index 0000000..abf960a Binary files /dev/null and b/frontend/mnist/data/mnist.npz differ diff --git a/frontend/mnist/models.py b/frontend/mnist/models.py new file mode 100644 index 0000000..2e2a1b9 --- /dev/null +++ b/frontend/mnist/models.py @@ -0,0 +1,292 @@ +import nn + +class PerceptronModel(object): + def __init__(self, dimensions): + """ + Initialize a new Perceptron instance. + + A perceptron classifies data points as either belonging to a particular + class (+1) or not (-1). `dimensions` is the dimensionality of the data. + For example, dimensions=2 would mean that the perceptron must classify + 2D points. + """ + self.w = nn.Parameter(1, dimensions) + + def get_weights(self): + """ + Return a Parameter instance with the current weights of the perceptron. + """ + return self.w + + def run(self, x): + """ + Calculates the score assigned by the perceptron to a data point x. + + Inputs: + x: a node with shape (1 x dimensions) + Returns: a node containing a single number (the score) + """ + "*** YOUR CODE HERE ***" + return nn.DotProduct(x, self.get_weights()) + + def get_prediction(self, x): + """ + Calculates the predicted class for a single data point `x`. + + Returns: 1 or -1 + """ + "*** YOUR CODE HERE ***" + score = self.run(x) + if nn.as_scalar(score) >= 0: + return 1 + else: + return -1 + + def train(self, dataset): + """ + Train the perceptron until convergence. + """ + "*** YOUR CODE HERE ***" + batch_size = 1 + + while True: + converged = True + for x, y in dataset.iterate_once(batch_size): + prediction = self.get_prediction(x) + print(x, y) + assert 0 + if prediction != nn.as_scalar(y): + converged = False + self.w.update(x, nn.as_scalar(y)) + if converged: + break + + +class RegressionModel(object): + """ + A neural network model for approximating a function that maps from real + numbers to real numbers. The network should be sufficiently large to be able + to approximate sin(x) on the interval [-2pi, 2pi] to reasonable precision. + """ + def __init__(self): + # Initialize your model parameters here + "*** YOUR CODE HERE ***" + self.i = 1 + self.o = 1 + + self.h = 50 + self.b = 10 + self.learning_rate = 0.01 + + self.W1 = nn.Parameter(self.i, self.h) + self.b1 = nn.Parameter(1, self.h) + self.W2 = nn.Parameter(self.h, self.o) + self.b2 = nn.Parameter(1, self.o) + + def run(self, x): + """ + Runs the model for a batch of examples. + + Inputs: + x: a node with shape (batch_size x 1) + Returns: + A node with shape (batch_size x 1) containing predicted y-values + """ + "*** YOUR CODE HERE ***" + layer_1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.W1), self.b1)) + prediction = nn.AddBias(nn.Linear(layer_1, self.W2), self.b2) + return prediction + + def get_loss(self, x, y): + """ + Computes the loss for a batch of examples. + + Inputs: + x: a node with shape (batch_size x 1) + y: a node with shape (batch_size x 1), containing the true y-values + to be used for training + Returns: a loss node + """ + "*** YOUR CODE HERE ***" + return nn.SquareLoss(self.run(x), y) + + + def train(self, dataset): + """ + Trains the model. + """ + "*** YOUR CODE HERE ***" + for i in range(20): + for x, y in dataset.iterate_once(self.b): + loss = self.get_loss(x, y) + print(loss.data) + g_W1, g_b1, g_W2, g_b2 = nn.gradients(loss, [self.W1, self.b1, self.W2, self.b2]) + # print(g_W1.data) + # print(g_b1.data) + # print(g_W2.data) + # print(g_b2.data) + self.W1.update(g_W1, -self.learning_rate) + self.b1.update(g_b1, -self.learning_rate) + self.W2.update(g_W2, -self.learning_rate) + self.b2.update(g_b2, -self.learning_rate) + if loss.data < 0.01: + break + + +class DigitClassificationModel(object): + """ + A model for handwritten digit classification using the MNIST dataset. + + Each handwritten digit is a 28x28 pixel grayscale image, which is flattened + into a 784-dimensional vector for the purposes of this model. Each entry in + the vector is a floating point number between 0 and 1. + + The goal is to sort each digit into one of 10 classes (number 0 through 9). + + (See RegressionModel for more information about the APIs of different + methods here. We recommend that you implement the RegressionModel before + working on this part of the project.) + """ + def __init__(self): + # Initialize your model parameters here + "*** YOUR CODE HERE ***" + self.input_features = 784 + self.h1 = 200 + self.h2 = 100 + self.output_features = 10 + self.lr = 0.01 + self.batch_size = 100 + self.w1 = nn.Parameter(self.input_features, self.h1) + self.b1 = nn.Parameter(1, self.h1) + self.w2 = nn.Parameter(self.h1, self.h2) + self.b2 = nn.Parameter(1, self.h2) + self.w3 = nn.Parameter(self.h2, self.output_features) + self.b3 = nn.Parameter(1, self.output_features) + + def run(self, x): + """ + Runs the model for a batch of examples. + + Your model should predict a node with shape (batch_size x 10), + containing scores. Higher scores correspond to greater probability of + the image belonging to a particular class. + + Inputs: + x: a node with shape (batch_size x 784) + Output: + A node with shape (batch_size x 10) containing predicted scores + (also called logits) + """ + "*** YOUR CODE HERE ***" + l1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1)) + l2 = nn.ReLU(nn.AddBias(nn.Linear(l1, self.w2), self.b2)) + l3 = nn.AddBias(nn.Linear(l2, self.w3), self.b3) + return l3 + + def get_loss(self, x, y): + """ + Computes the loss for a batch of examples. + + The correct labels `y` are represented as a node with shape + (batch_size x 10). Each row is a one-hot vector encoding the correct + digit class (0-9). + + Inputs: + x: a node with shape (batch_size x 784) + y: a node with shape (batch_size x 10) + Returns: a loss node + """ + "*** YOUR CODE HERE ***" + return nn.SoftmaxLoss(self.run(x), y) + + def train(self, dataset): + """ + Trains the model. + """ + "*** YOUR CODE HERE ***" + while True: + for x, y in dataset.iterate_once(self.batch_size): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2, g_w3, g_b3 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3]) + self.w1.update(g_w1, -self.lr) + self.b1.update(g_b1, -self.lr) + self.w2.update(g_w2, -self.lr) + self.b2.update(g_b2, -self.lr) + self.w3.update(g_w3, -self.lr) + self.b3.update(g_b3, -self.lr) + accuracy = dataset.get_validation_accuracy() + print(accuracy) + if accuracy > 0.95: + break + +class LanguageIDModel(object): + """ + A model for language identification at a single-word granularity. + + (See RegressionModel for more information about the APIs of different + methods here. We recommend that you implement the RegressionModel before + working on this part of the project.) + """ + def __init__(self): + # Our dataset contains words from five different languages, and the + # combined alphabets of the five languages contain a total of 47 unique + # characters. + # You can refer to self.num_chars or len(self.languages) in your code + self.num_chars = 47 + self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] + + # Initialize your model parameters here + "*** YOUR CODE HERE ***" + + def run(self, xs): + """ + Runs the model for a batch of examples. + + Although words have different lengths, our data processing guarantees + that within a single batch, all words will be of the same length (L). + + Here `xs` will be a list of length L. Each element of `xs` will be a + node with shape (batch_size x self.num_chars), where every row in the + array is a one-hot vector encoding of a character. For example, if we + have a batch of 8 three-letter words where the last word is "cat", then + xs[1] will be a node that contains a 1 at position (7, 0). Here the + index 7 reflects the fact that "cat" is the last word in the batch, and + the index 0 reflects the fact that the letter "a" is the inital (0th) + letter of our combined alphabet for this task. + + Your model should use a Recurrent Neural Network to summarize the list + `xs` into a single node of shape (batch_size x hidden_size), for your + choice of hidden_size. It should then calculate a node of shape + (batch_size x 5) containing scores, where higher scores correspond to + greater probability of the word originating from a particular language. + + Inputs: + xs: a list with L elements (one per character), where each element + is a node with shape (batch_size x self.num_chars) + Returns: + A node with shape (batch_size x 5) containing predicted scores + (also called logits) + """ + "*** YOUR CODE HERE ***" + + def get_loss(self, xs, y): + """ + Computes the loss for a batch of examples. + + The correct labels `y` are represented as a node with shape + (batch_size x 5). Each row is a one-hot vector encoding the correct + language. + + Inputs: + xs: a list with L elements (one per character), where each element + is a node with shape (batch_size x self.num_chars) + y: a node with shape (batch_size x 5) + Returns: a loss node + """ + "*** YOUR CODE HERE ***" + + def train(self, dataset): + """ + Trains the model. + """ + "*** YOUR CODE HERE ***" diff --git a/frontend/mnist/nn.py b/frontend/mnist/nn.py new file mode 100644 index 0000000..86822ed --- /dev/null +++ b/frontend/mnist/nn.py @@ -0,0 +1,393 @@ +import numpy as np +np.random.seed(42) +def format_shape(shape): + return "x".join(map(str, shape)) if shape else "()" + +class Node(object): + def __repr__(self): + return "<{} shape={} at {}>".format( + type(self).__name__, format_shape(self.data.shape), hex(id(self))) + +class DataNode(Node): + """ + DataNode is the parent class for Parameter and Constant nodes. + + You should not need to use this class directly. + """ + def __init__(self, data): + self.parents = [] + self.data = data + + def _forward(self, *inputs): + return self.data + + @staticmethod + def _backward(gradient, *inputs): + return [] + +class Parameter(DataNode): + """ + A Parameter node stores parameters used in a neural network (or perceptron). + + Use the the `update` method to update parameters when training the + perceptron or neural network. + """ + def __init__(self, *shape): + assert len(shape) == 2, ( + "Shape must have 2 dimensions, instead has {}".format(len(shape))) + assert all(isinstance(dim, int) and dim > 0 for dim in shape), ( + "Shape must consist of positive integers, got {!r}".format(shape)) + limit = np.sqrt(3.0 / np.mean(shape)) + data = np.random.uniform(low=-limit, high=limit, size=shape) + super().__init__(data) + + def update(self, direction, multiplier): + assert isinstance(direction, Constant), ( + "Update direction must be a {} node, instead has type {!r}".format( + Constant.__name__, type(direction).__name__)) + assert direction.data.shape == self.data.shape, ( + "Update direction shape {} does not match parameter shape " + "{}".format( + format_shape(direction.data.shape), + format_shape(self.data.shape))) + assert isinstance(multiplier, (int, float)), ( + "Multiplier must be a Python scalar, instead has type {!r}".format( + type(multiplier).__name__)) + self.data += multiplier * direction.data + assert np.all(np.isfinite(self.data)), ( + "Parameter contains NaN or infinity after update, cannot continue") + +class Constant(DataNode): + """ + A Constant node is used to represent: + * Input features + * Output labels + * Gradients computed by back-propagation + + You should not need to construct any Constant nodes directly; they will + instead be provided by either the dataset or when you call `nn.gradients`. + """ + def __init__(self, data): + assert isinstance(data, np.ndarray), ( + "Data should be a numpy array, instead has type {!r}".format( + type(data).__name__)) + assert np.issubdtype(data.dtype, np.floating), ( + "Data should be a float array, instead has data type {!r}".format( + data.dtype)) + super().__init__(data) + +class FunctionNode(Node): + """ + A FunctionNode represents a value that is computed based on other nodes. + The FunctionNode class performs necessary book-keeping to compute gradients. + """ + def __init__(self, *parents): + assert all(isinstance(parent, Node) for parent in parents), ( + "Inputs must be node objects, instead got types {!r}".format( + tuple(type(parent).__name__ for parent in parents))) + self.parents = parents + self.data = self._forward(*(parent.data for parent in parents)) + +class Add(FunctionNode): + """ + Adds matrices element-wise. + + Usage: nn.Add(x, y) + Inputs: + x: a Node with shape (batch_size x num_features) + y: a Node with the same shape as x + Output: + a Node with shape (batch_size x num_features) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape == inputs[1].shape, ( + "Input shapes should match, instead got {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return inputs[0] + inputs[1] + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape == inputs[0].shape + return [gradient, gradient] + +class AddBias(FunctionNode): + """ + Adds a bias vector to each feature vector + + Usage: nn.AddBias(features, bias) + Inputs: + features: a Node with shape (batch_size x num_features) + bias: a Node with shape (1 x num_features) + Output: + a Node with shape (batch_size x num_features) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[1].shape[0] == 1, ( + "First dimension of second input should be 1, instead got shape " + "{}".format(format_shape(inputs[1].shape))) + assert inputs[0].shape[1] == inputs[1].shape[1], ( + "Second dimension of inputs should match, instead got shapes {} " + "and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return inputs[0] + inputs[1] + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape == inputs[0].shape + return [gradient, np.sum(gradient, axis=0, keepdims=True)] + +class DotProduct(FunctionNode): + """ + Batched dot product + + Usage: nn.DotProduct(features, weights) + Inputs: + features: a Node with shape (batch_size x num_features) + weights: a Node with shape (1 x num_features) + Output: a Node with shape (batch_size x 1) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[1].shape[0] == 1, ( + "First dimension of second input should be 1, instead got shape " + "{}".format(format_shape(inputs[1].shape))) + assert inputs[0].shape[1] == inputs[1].shape[1], ( + "Second dimension of inputs should match, instead got shapes {} " + "and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return np.dot(inputs[0], inputs[1].T) + + @staticmethod + def _backward(gradient, *inputs): + # assert gradient.shape[0] == inputs[0].shape[0] + # assert gradient.shape[1] == 1 + # return [np.dot(gradient, inputs[1]), np.dot(gradient.T, inputs[0])] + raise NotImplementedError( + "Backpropagation through DotProduct nodes is not needed in this " + "assignment") + +class Linear(FunctionNode): + """ + Applies a linear transformation (matrix multiplication) to the input + + Usage: nn.Linear(features, weights) + Inputs: + features: a Node with shape (batch_size x input_features) + weights: a Node with shape (input_features x output_features) + Output: a node with shape (batch_size x input_features) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape[1] == inputs[1].shape[0], ( + "Second dimension of first input should match first dimension of " + "second input, instead got shapes {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return np.dot(inputs[0], inputs[1]) + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape[0] == inputs[0].shape[0] + assert gradient.shape[1] == inputs[1].shape[1] + return [np.dot(gradient, inputs[1].T), np.dot(inputs[0].T, gradient)] + +class ReLU(FunctionNode): + """ + An element-wise Rectified Linear Unit nonlinearity: max(x, 0). + This nonlinearity replaces all negative entries in its input with zeros. + + Usage: nn.ReLU(x) + Input: + x: a Node with shape (batch_size x num_features) + Output: a Node with the same shape as x, but no negative entries + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 1, "Expected 1 input, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "Input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + return np.maximum(inputs[0], 0) + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape == inputs[0].shape + return [gradient * np.where(inputs[0] > 0, 1.0, 0.0)] + +class SquareLoss(FunctionNode): + """ + This node first computes 0.5 * (a[i,j] - b[i,j])**2 at all positions (i,j) + in the inputs, which creates a (batch_size x dim) matrix. It then calculates + and returns the mean of all elements in this matrix. + + Usage: nn.SquareLoss(a, b) + Inputs: + a: a Node with shape (batch_size x dim) + b: a Node with shape (batch_size x dim) + Output: a scalar Node (containing a single floating-point number) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape == inputs[1].shape, ( + "Input shapes should match, instead got {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return np.mean(np.square(inputs[0] - inputs[1]) / 2) + + @staticmethod + def _backward(gradient, *inputs): + assert np.asarray(gradient).ndim == 0 + return [ + gradient * (inputs[0] - inputs[1]) / inputs[0].size, + gradient * (inputs[1] - inputs[0]) / inputs[0].size + ] + +class SoftmaxLoss(FunctionNode): + """ + A batched softmax loss, used for classification problems. + + IMPORTANT: do not swap the order of the inputs to this node! + + Usage: nn.SoftmaxLoss(logits, labels) + Inputs: + logits: a Node with shape (batch_size x num_classes). Each row + represents the scores associated with that example belonging to a + particular class. A score can be an arbitrary real number. + labels: a Node with shape (batch_size x num_classes) that encodes the + correct labels for the examples. All entries must be non-negative + and the sum of values along each row should be 1. + Output: a scalar Node (containing a single floating-point number) + """ + @staticmethod + def log_softmax(logits): + log_probs = logits - np.max(logits, axis=1, keepdims=True) + log_probs -= np.log(np.sum(np.exp(log_probs), axis=1, keepdims=True)) + return log_probs + + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape == inputs[1].shape, ( + "Input shapes should match, instead got {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + assert np.all(inputs[1] >= 0), ( + "All entries in the labels input must be non-negative") + assert np.allclose(np.sum(inputs[1], axis=1), 1), ( + "Labels input must sum to 1 along each row") + log_probs = SoftmaxLoss.log_softmax(inputs[0]) + return np.mean(-np.sum(inputs[1] * log_probs, axis=1)) + + @staticmethod + def _backward(gradient, *inputs): + assert np.asarray(gradient).ndim == 0 + log_probs = SoftmaxLoss.log_softmax(inputs[0]) + return [ + gradient * (np.exp(log_probs) - inputs[1]) / inputs[0].shape[0], + gradient * -log_probs / inputs[0].shape[0] + ] + +def gradients(loss, parameters): + """ + Computes and returns the gradient of the loss with respect to the provided + parameters. + + Usage: nn.gradients(loss, parameters) + Inputs: + loss: a SquareLoss or SoftmaxLoss node + parameters: a list (or iterable) containing Parameter nodes + Output: a list of Constant objects, representing the gradient of the loss + with respect to each provided parameter. + """ + + assert isinstance(loss, (SquareLoss, SoftmaxLoss)), ( + "Loss must be a loss node, instead has type {!r}".format( + type(loss).__name__)) + assert all(isinstance(parameter, Parameter) for parameter in parameters), ( + "Parameters must all have type {}, instead got types {!r}".format( + Parameter.__name__, + tuple(type(parameter).__name__ for parameter in parameters))) + assert not hasattr(loss, "used"), ( + "Loss node has already been used for backpropagation, cannot reuse") + + loss.used = True + + nodes = set() + tape = [] + + def visit(node): + if node not in nodes: + for parent in node.parents: + visit(parent) + nodes.add(node) + tape.append(node) + + visit(loss) + nodes |= set(parameters) + + grads = {node: np.zeros_like(node.data) for node in nodes} + grads[loss] = 1.0 + + for node in reversed(tape): + parent_grads = node._backward( + grads[node], *(parent.data for parent in node.parents)) + for parent, parent_grad in zip(node.parents, parent_grads): + grads[parent] += parent_grad + + return [Constant(grads[parameter]) for parameter in parameters] + +def as_scalar(node): + """ + Returns the value of a Node as a standard Python number. This only works + for nodes with one element (e.g. SquareLoss and SoftmaxLoss, as well as + DotProduct with a batch size of 1 element). + """ + + assert isinstance(node, Node), ( + "Input must be a node object, instead has type {!r}".format( + type(node).__name__)) + assert node.data.size == 1, ( + "Node has shape {}, cannot convert to a scalar".format( + format_shape(node.data.shape))) + node.data = node.data.flatten() + return node.data.tolist()[0] diff --git a/frontend/uct/data/mnist.npz b/frontend/uct/data/mnist.npz new file mode 100644 index 0000000..abf960a Binary files /dev/null and b/frontend/uct/data/mnist.npz differ diff --git a/frontend/uct/dataset.py b/frontend/uct/dataset.py new file mode 100644 index 0000000..06f9307 --- /dev/null +++ b/frontend/uct/dataset.py @@ -0,0 +1,36 @@ +import collections +import os +import time + +import matplotlib.pyplot as plt +import numpy as np + +import uctc.nn as nn + +use_graphics = True + +def maybe_sleep_and_close(seconds): + if use_graphics and plt.get_fignums(): + time.sleep(seconds) + for fignum in plt.get_fignums(): + fig = plt.figure(fignum) + plt.close(fig) + try: + # This raises a TclError on some Windows machines + fig.canvas.start_event_loop(1e-3) + except: + pass + +def get_data_path(filename): + path = os.path.join( + os.path.dirname(__file__), os.pardir, "data", filename) + if not os.path.exists(path): + path = os.path.join( + os.path.dirname(__file__), "data", filename) + if not os.path.exists(path): + path = os.path.join( + os.path.dirname(__file__), filename) + if not os.path.exists(path): + raise Exception("Could not find data file: {}".format(filename)) + return path + diff --git a/frontend/uct/mnist.py b/frontend/uct/mnist.py new file mode 100644 index 0000000..e8aab63 --- /dev/null +++ b/frontend/uct/mnist.py @@ -0,0 +1,232 @@ +import numpy as np +import time +import os +import collections + +import matplotlib.pyplot as plt +import uctc.nn as nn +from utils import parameter_data, Dataset + +use_graphics = False + +class DigitClassificationModel(object): + """ + A model for handwritten digit classification using the MNIST dataset. + + Each handwritten digit is a 28x28 pixel grayscale image, which is flattened + into a 784-dimensional vector for the purposes of this model. Each entry in + the vector is a floating point number between 0 and 1. + + The goal is to sort each digit into one of 10 classes (number 0 through 9). + + (See RegressionModel for more information about the APIs of different + methods here. We recommend that you implement the RegressionModel before + working on this part of the project.) + """ + def __init__(self): + # Initialize your model parameters here + "*** YOUR CODE HERE ***" + self.input_features = 784 + self.h1 = 200 + self.h2 = 100 + self.output_features = 10 + self.lr = 0.01 + self.batch_size = 100 + self.w1 = nn.Parameter(parameter_data(self.input_features, self.h1)) + self.b1 = nn.Parameter(parameter_data(1, self.h1)) + self.w2 = nn.Parameter(parameter_data(self.h1, self.h2)) + self.b2 = nn.Parameter(parameter_data(1, self.h2)) + self.w3 = nn.Parameter(parameter_data(self.h2, self.output_features)) + self.b3 = nn.Parameter(parameter_data(1, self.output_features)) + + + def run(self, x): + """ + Runs the model for a batch of examples. + + Your model should predict a node with shape (batch_size x 10), + containing scores. Higher scores correspond to greater probability of + the image belonging to a particular class. + + Inputs: + x: a node with shape (batch_size x 784) + Output: + A node with shape (batch_size x 10) containing predicted scores + (also called logits) + """ + "*** YOUR CODE HERE ***" + l1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1)) + l2 = nn.ReLU(nn.AddBias(nn.Linear(l1, self.w2), self.b2)) + l3 = nn.AddBias(nn.Linear(l2, self.w3), self.b3) + return l3 + + def get_loss(self, x, y): + """ + Computes the loss for a batch of examples. + + The correct labels `y` are represented as a node with shape + (batch_size x 10). Each row is a one-hot vector encoding the correct + digit class (0-9). + + Inputs: + x: a node with shape (batch_size x 784) + y: a node with shape (batch_size x 10) + Returns: a loss node + """ + "*** YOUR CODE HERE ***" + return nn.SoftmaxLoss(self.run(x), y) + + def train(self, dataset): + """ + Trains the model. + """ + "*** YOUR CODE HERE ***" + while True: + for x, y in dataset.iterate_once(self.batch_size): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2, g_w3, g_b3 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3]) + self.w1.update(g_w1, self.lr) + self.b1.update(g_b1, self.lr) + self.w2.update(g_w2, self.lr) + self.b2.update(g_b2, self.lr) + self.w3.update(g_w3, self.lr) + self.b3.update(g_b3, self.lr) + accuracy = dataset.get_validation_accuracy() + print(accuracy) + if accuracy > 0.95: + break + +def get_data_path(filename): + path = os.path.join( + os.path.dirname(__file__), os.pardir, "data", filename) + if not os.path.exists(path): + path = os.path.join( + os.path.dirname(__file__), "data", filename) + if not os.path.exists(path): + path = os.path.join( + os.path.dirname(__file__), filename) + if not os.path.exists(path): + raise Exception("Could not find data file: {}".format(filename)) + return path + +class DigitClassificationDataset(Dataset): + def __init__(self, model: DigitClassificationModel): + mnist_path = get_data_path("mnist.npz") + + with np.load(mnist_path) as data: + train_images = data["train_images"] + train_labels = data["train_labels"] + test_images = data["test_images"] + test_labels = data["test_labels"] + assert len(train_images) == len(train_labels) == 60000 + assert len(test_images) == len(test_labels) == 10000 + self.dev_images = np.array(test_images[0::2], copy=True) + self.dev_labels = np.array(test_labels[0::2], copy=True) + self.test_images = np.array(test_images[1::2], copy=True) + self.test_labels = np.array(test_labels[1::2], copy=True) + + train_labels_one_hot = np.zeros((len(train_images), 10)) + train_labels_one_hot[range(len(train_images)), train_labels] = 1 + + super().__init__(train_images, train_labels_one_hot) + + self.model = model + self.epoch = 0 + + if use_graphics: + width = 20 # Width of each row expressed as a multiple of image width + samples = 100 # Number of images to display per label + fig = plt.figure() + ax = {} + images = collections.defaultdict(list) + texts = collections.defaultdict(list) + for i in reversed(range(10)): + ax[i] = plt.subplot2grid((30, 1), (3 * i, 0), 2, 1, + sharex=ax.get(9)) + plt.setp(ax[i].get_xticklabels(), visible=i == 9) + ax[i].set_yticks([]) + ax[i].text(-0.03, 0.5, i, transform=ax[i].transAxes, + va="center") + ax[i].set_xlim(0, 28 * width) + ax[i].set_ylim(0, 28) + for j in range(samples): + images[i].append(ax[i].imshow( + np.zeros((28, 28)), vmin=0, vmax=1, cmap="Greens", + alpha=0.3)) + texts[i].append(ax[i].text( + 0, 0, "", ha="center", va="top", fontsize="smaller")) + ax[9].set_xticks(np.linspace(0, 28 * width, 11)) + ax[9].set_xticklabels( + ["{:.1f}".format(num) for num in np.linspace(0, 1, 11)]) + ax[9].tick_params(axis="x", pad=16) + ax[9].set_xlabel("Probability of Correct Label") + status = ax[0].text( + 0.5, 1.5, "", transform=ax[0].transAxes, ha="center", + va="bottom") + plt.show(block=False) + + self.width = width + self.samples = samples + self.fig = fig + self.images = images + self.texts = texts + self.status = status + self.last_update = time.time() + + def iterate_once(self, batch_size): + self.epoch += 1 + + for i, (x, y) in enumerate(super().iterate_once(batch_size)): + yield x, y + + if time.time() - self.last_update > 1: + dev_logits = self.model.run(nn.Constant(self.dev_images)).tensor() + # dev_logits = np.array(dev_logits_raw.data()).reshape(5000, 10) + # dev_predicted = np.argmax(dev_logits, axis=1) + dev_argmax = nn.argmax(dev_logits, axis=1) + dev_predicted = np.array(dev_argmax.data()) + # sftmax = np.array(nn.log_softmax(nn.pyarray_to_tensor(dev_logits)).data()).reshape(5000, 10) + sftmax = nn.log_softmax(dev_logits) + dev_probs = np.array(nn.exp(sftmax).data()).reshape(5000, 10) + dev_accuracy = np.mean(dev_predicted == self.dev_labels) + print("epoch: {:d}, batch: {:d}/{:d}, validation accuracy: " + "{:.2%}".format( + self.epoch, i, len(self.x) // batch_size, dev_accuracy)) + if use_graphics: + self.status.set_text( + "epoch: {:d}, batch: {:d}/{:d}, validation accuracy: " + "{:.2%}".format( + self.epoch, i, len(self.x) // batch_size, dev_accuracy)) + for i in range(10): + predicted = dev_predicted[self.dev_labels == i] + probs = dev_probs[self.dev_labels == i][:, i] + linspace = np.linspace( + 0, len(probs) - 1, self.samples).astype(int) + indices = probs.argsort()[linspace] + for j, (prob, image) in enumerate(zip( + probs[indices], + self.dev_images[self.dev_labels == i][indices])): + self.images[i][j].set_data(image.reshape((28, 28))) + left = prob * (self.width - 1) * 28 + if predicted[indices[j]] == i: + self.images[i][j].set_cmap("Greens") + self.texts[i][j].set_text("") + else: + self.images[i][j].set_cmap("Reds") + self.texts[i][j].set_text(predicted[indices[j]]) + self.texts[i][j].set_x(left + 14) + self.images[i][j].set_extent([left, left + 28, 0, 28]) + self.fig.canvas.draw_idle() + self.fig.canvas.start_event_loop(1e-3) + self.last_update = time.time() + + def get_validation_accuracy(self): + # print(self.dev_images[:2].tolist()) + dev_logits = self.model.run(nn.Constant(self.dev_images)).tensor() + dev_predicted = np.array(nn.argmax(dev_logits, axis=1).data()) + dev_accuracy = np.mean(dev_predicted == self.dev_labels) + return dev_accuracy + +model = DigitClassificationModel() +dataset = DigitClassificationDataset(model) +model.train(dataset) \ No newline at end of file diff --git a/frontend/uct/perception.py b/frontend/uct/perception.py new file mode 100644 index 0000000..1dcdff1 --- /dev/null +++ b/frontend/uct/perception.py @@ -0,0 +1,129 @@ +import numpy as np +import time +import os + +import matplotlib.pyplot as plt +import uctc.nn as nn +from utils import parameter_data, Dataset + +use_graphics = False +class PerceptronModel(object): + def __init__(self, dimensions): + """ + Initialize a new Perceptron instance. + + A perceptron classifies data points as either belonging to a particular + class (+1) or not (-1). `dimensions` is the dimensionality of the data. + For example, dimensions=2 would mean that the perceptron must classify + 2D points. + """ + self.w = nn.Parameter(parameter_data(dimensions, 1)) + + def get_weights(self): + """ + Return a Parameter instance with the current weights of the perceptron. + """ + return self.w.data() + + def run(self, x): + """ + Calculates the score assigned by the perceptron to a data point x. + + Inputs: + x: a node with shape (1 x dimensions) + Returns: a node containing a single number (the score) + """ + "*** YOUR CODE HERE ***" + out = nn.Linear(x, self.w) + return out + + def get_prediction(self, x): + """ + Calculates the predicted class for a single data point `x`. + + Returns: 1 or -1 + """ + "*** YOUR CODE HERE ***" + score = self.run(x).data()[0] + # score = np.array(x.data()).dot(np.array(self.w.data())) + if score >= 0: + return 1 + else: + return -1 + + + def train(self, dataset): + """ + Train the perceptron until convergence. + """ + "*** YOUR CODE HERE ***" + batch_size = 1 + + while True: + converged = True + for x, y in dataset.iterate_once(batch_size): + prediction = self.get_prediction(x) + x = np.array(x.data(), dtype=np.float32) + y = int(y.data()[0]) + # assert 0 + if prediction != y: + # print(prediction, y) + converged = False + self.w.update(nn.pyarray_to_tensor(x), -y) + # time.sleep(0.01) + if converged: + break + +class PerceptronDataset(Dataset): + def __init__(self, model: PerceptronModel): + points = 500 + x = np.hstack([np.random.randn(points, 2), np.ones((points, 1))]) + y = np.where(x[:, 0] + 2 * x[:, 1] - 1 >= 0, 1.0, -1.0) + super().__init__(x, np.expand_dims(y, axis=1)) + + self.model = model + self.epoch = 0 + limits = np.array([-3.0, 3.0]) + if use_graphics: + fig, ax = plt.subplots(1, 1) + ax.set_xlim(limits) + ax.set_ylim(limits) + positive = ax.scatter(*x[y == 1, :-1].T, color="red", marker="+") + negative = ax.scatter(*x[y == -1, :-1].T, color="blue", marker="_") + line, = ax.plot([], [], color="black") + text = ax.text(0.03, 0.97, "", transform=ax.transAxes, va="top") + ax.legend([positive, negative], [1, -1]) + plt.show(block=False) + + self.fig = fig + self.line = line + self.text = text + self.limits = limits + self.last_update = time.time() + + def iterate_once(self, batch_size): + self.epoch += 1 + + for i, (x, y) in enumerate(super().iterate_once(batch_size)): + yield x, y + + if time.time() - self.last_update > 0.001: + w = self.model.get_weights() + limits = self.limits + print(f"epoch: {self.epoch}\npoint: {i * batch_size + 1}/{len(self.x)}\nweights: {w}") + if use_graphics: + if w[1] != 0: + self.line.set_data(limits, (-w[0] * limits - w[2]) / w[1]) + elif w[0] != 0: + self.line.set_data(np.full(2, -w[2] / w[0]), limits) + else: + self.line.set_data([], []) + self.text.set_text( + f"epoch: {self.epoch}\npoint: {i * batch_size + 1}/{len(self.x)}\nweights: {w}") + self.fig.canvas.draw_idle() + self.fig.canvas.start_event_loop(1e-3) + self.last_update = time.time() + +model = PerceptronModel(3) +dataset = PerceptronDataset(model) +model.train(dataset) \ No newline at end of file diff --git a/frontend/uct/regression.py b/frontend/uct/regression.py new file mode 100644 index 0000000..a03fec3 --- /dev/null +++ b/frontend/uct/regression.py @@ -0,0 +1,141 @@ +import numpy as np +np.random.seed(42) +import time +import os + +import matplotlib.pyplot as plt +import uctc.nn as nn +from utils import parameter_data, Dataset + +use_graphics = False + +class RegressionModel(object): + """ + A neural network model for approximating a function that maps from real + numbers to real numbers. The network should be sufficiently large to be able + to approximate sin(x) on the interval [-2pi, 2pi] to reasonable precision. + """ + def __init__(self): + # Initialize your model parameters here + self.batch_size = 10 + self.input_features = 1 + self.output_features = 1 + self.hidden_f1 = 50 + self.lr = 0.01 + self.w1 = nn.Parameter(parameter_data(self.input_features, self.hidden_f1)) + self.b1 = nn.Parameter(parameter_data(1, self.hidden_f1)) + self.w2 = nn.Parameter(parameter_data(self.hidden_f1, self.output_features)) + self.b2 = nn.Parameter(parameter_data(1, self.output_features)) + + def run(self, x): + """ + Runs the model for a batch of examples. + + Inputs: + x: a node with shape (batch_size x 1) + Returns: + A node with shape (batch_size x 1) containing predicted y-values + """ + "*** YOUR CODE HERE ***" + # uctc + linear1 = nn.Linear(x, self.w1) + bias1 = nn.AddBias(linear1, self.b1) + act1 = nn.ReLU(bias1) + linear2 = nn.Linear(act1, self.w2) + bias2 = nn.AddBias(linear2, self.b2) + + # numpy + # print(len(x.data())) + _x = np.array(x.data()).reshape(-1, 1) + _w1 = np.array(self.w1.data()).reshape(self.input_features, -1) + _b1 = np.array(self.b1.data()).reshape(1, -1) + _w2 = np.array(self.w2.data()).reshape(self.hidden_f1, -1) + _b2 = np.array(self.b2.data()).reshape(1, -1) + + _linear1 = np.dot(_x, _w1) + _b1 + _act1 = np.maximum(0.0, _linear1) + _linear2 = np.dot(_act1, _w2) + _b2 + + return bias2 + + def get_loss(self, x, y): + """ + Computes the loss for a batch of examples. + + Inputs: + x: a node with shape (batch_size x 1) + y: a node with shape (batch_size x 1), containing the true y-values + to be used for training + Returns: a loss node + """ + "*** YOUR CODE HERE ***" + predict_y = self.run(x) + return nn.SquareLoss(predict_y, y) + + def train(self, dataset): + """ + Trains the model. + """ + "*** YOUR CODE HERE ***" + itera = 0 + while True: + for x, y in dataset.iterate_once(self.batch_size): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + self.w1.update(g_w1, self.lr) + self.b1.update(g_b1, self.lr) + self.w2.update(g_w2, self.lr) + self.b2.update(g_b2, self.lr) + itera += 1 + if loss.data()[0] < 0.01: + break + + +class RegressionDataset(Dataset): + def __init__(self, model: RegressionModel): + x = np.expand_dims(np.linspace(-2 * np.pi, 2 * np.pi, num=200), axis=1) + np.random.RandomState(0).shuffle(x) + self.argsort_x = np.argsort(x.flatten()) + y = np.sin(x) + super().__init__(x, y) + + self.model = model + self.processed = 0 + + if use_graphics: + fig, ax = plt.subplots(1, 1) + ax.set_xlim(-2 * np.pi, 2 * np.pi) + ax.set_ylim(-1.4, 1.4) + real, = ax.plot(x[self.argsort_x], y[self.argsort_x], color="blue") + learned, = ax.plot([], [], color="red") + text = ax.text(0.03, 0.97, "", transform=ax.transAxes, va="top") + ax.legend([real, learned], ["real", "learned"]) + plt.show(block=False) + + self.fig = fig + self.learned = learned + self.text = text + self.last_update = time.time() + + def iterate_once(self, batch_size): + for x, y in super().iterate_once(batch_size): + yield x, y + self.processed += batch_size + + if time.time() - self.last_update > 0.01: + predicted = self.model.run(nn.Constant(self.x)).data() + loss = self.model.get_loss( + x, y).data() + predicted = np.array(predicted) + loss = loss[0] + print(f"processed: {self.processed}\nloss: {loss: .6f}") + if use_graphics: + self.learned.set_data(self.x[self.argsort_x], predicted[self.argsort_x]) + self.text.set_text(f"processed: {self.processed}\nloss: {loss: .6f}") + self.fig.canvas.draw_idle() + self.fig.canvas.start_event_loop(1e-3) + self.last_update = time.time() + +model = RegressionModel() +dataset = RegressionDataset(model) +model.train(dataset) \ No newline at end of file diff --git a/frontend/uct/test/01_addbias_test.py b/frontend/uct/test/01_addbias_test.py new file mode 100644 index 0000000..59a69bb --- /dev/null +++ b/frontend/uct/test/01_addbias_test.py @@ -0,0 +1,72 @@ +import uctc.nn as nn +import std_model as stdnn +import numpy as np + +class LinearTestModel: + def __init__(self, output_features): + self.b1 = nn.Parameter([1, output_features]) + + def forward(self, x): + l2 = nn.AddBias(x, self.b1) + return l2 + + def get_loss(self, x, y): + return nn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_b1 = nn.gradients(loss, [self.b1])[0] + return g_b1.data() + +class StdLinerTestModel: + def __init__(self, output_features, tmodel: LinearTestModel): + self.b1 = stdnn.Parameter(1, output_features) + self.b1.data = np.array(tmodel.b1.data()).reshape(1, output_features) + + def forward(self, x): + l2 = stdnn.AddBias(x, self.b1) + return l2 + + def get_loss(self, x, y): + return stdnn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_b1 = stdnn.gradients(loss, [self.b1])[0] + return g_b1.data.flatten().tolist() + +output_features = 32 +batch_size = 4 +x = np.random.randn(batch_size, output_features).astype(np.float32) +y = np.random.randn(batch_size, output_features).astype(np.float32) + +model = LinearTestModel(output_features) +test_x = nn.Constant(x) +predict_y = model.forward(test_x).data() +test_y = nn.Constant(y) +loss = model.get_loss(test_x, test_y).data() +g_b1 = model.backward(test_x, test_y) + +stdmodel = StdLinerTestModel(output_features, model) +std_test_x = stdnn.Constant(x) +std_predict_y = stdmodel.forward(std_test_x) +std_test_y = stdnn.Constant(y) +std_loss = stdmodel.get_loss(std_test_x, std_test_y) +std_g_b1 = stdmodel.backward(std_test_x, std_test_y) + +# check forward +for x, y in zip(predict_y, std_predict_y.data.tolist()[0]): + if (abs(x-y) > 1e-4): + assert 0, "Forward data mismatch!" + +# check loss +if abs(loss[0] - std_loss.data) > 1e-4: + assert 0, "Loss mismatch!" + +# check backward +for i, (x, y) in enumerate(zip(g_b1, std_g_b1)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient b1 mismatch at position {i}, g_b1 is {x} while std g_b1 is {y}" + + +print("Test passed") \ No newline at end of file diff --git a/frontend/uct/test/02_linear_test.py b/frontend/uct/test/02_linear_test.py new file mode 100644 index 0000000..574a04a --- /dev/null +++ b/frontend/uct/test/02_linear_test.py @@ -0,0 +1,81 @@ +import uctc.nn as nn +import std_model as stdnn +import numpy as np + +class LinearTestModel: + def __init__(self, input_features, output_features): + self.w1 = nn.Parameter([input_features, output_features]) + self.b1 = nn.Parameter([1, output_features]) + + def forward(self, x): + l1 = nn.Linear(x, self.w1) + l2 = nn.AddBias(l1, self.b1) + return l2 + + def get_loss(self, x, y): + return nn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1 = nn.gradients(loss, [self.w1, self.b1]) + return g_w1.data(), g_b1.data() + +class StdLinerTestModel: + def __init__(self, input_features, output_features, tmodel: LinearTestModel): + self.w1 = stdnn.Parameter(input_features, output_features) + self.b1 = stdnn.Parameter(1, output_features) + self.w1.data = np.array(tmodel.w1.data()).reshape(input_features, output_features) + self.b1.data = np.array(tmodel.b1.data()).reshape(1, output_features) + + def forward(self, x): + l1 = stdnn.Linear(x, self.w1) + l2 = stdnn.AddBias(l1, self.b1) + return l2 + + def get_loss(self, x, y): + return stdnn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1 = stdnn.gradients(loss, [self.w1, self.b1]) + return g_w1.data.flatten().tolist(), g_b1.data.flatten().tolist() + +input_features = 16 +output_features = 32 +batch_size = 4 +x = np.random.randn(batch_size, input_features).astype(np.float32) +y = np.random.randn(batch_size, output_features).astype(np.float32) + +model = LinearTestModel(input_features, output_features) +test_x = nn.Constant(x) +predict_y = model.forward(test_x).data() +test_y = nn.Constant(y) +loss = model.get_loss(test_x, test_y).data() +g_w1, g_b1 = model.backward(test_x, test_y) + +stdmodel = StdLinerTestModel(input_features, output_features, model) +std_test_x = stdnn.Constant(x) +std_predict_y = stdmodel.forward(std_test_x) +std_test_y = stdnn.Constant(y) +std_loss = stdmodel.get_loss(std_test_x, std_test_y) +std_g_w1, std_g_b1 = stdmodel.backward(std_test_x, std_test_y) + +# check forward +for x, y in zip(predict_y, std_predict_y.data.tolist()[0]): + if (abs(x-y) > 1e-4): + assert 0, "Forward data mismatch!" + +# check loss +if abs(loss[0] - std_loss.data) > 1e-4: + assert 0, "Loss mismatch!" + +# check backward +for i, (x, y) in enumerate(zip(g_w1, std_g_w1)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient w1 mismatch at position {i}, g_w1 is {x} while std g_w1 is {y}" +for i, (x, y) in enumerate(zip(g_b1, std_g_b1)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient b1 mismatch at position {i}, g_b1 is {x} while std g_b1 is {y}" + + +print("Test passed") \ No newline at end of file diff --git a/frontend/uct/test/03_relu_test.py b/frontend/uct/test/03_relu_test.py new file mode 100644 index 0000000..6d47abe --- /dev/null +++ b/frontend/uct/test/03_relu_test.py @@ -0,0 +1,83 @@ +import uctc.nn as nn +import std_model as stdnn +import numpy as np + +class LinearTestModel: + def __init__(self, input_features, output_features): + self.w1 = nn.Parameter([input_features, output_features]) + self.b1 = nn.Parameter([1, output_features]) + + def forward(self, x): + l1 = nn.Linear(x, self.w1) + l2 = nn.AddBias(l1, self.b1) + l3 = nn.ReLU(l2) + return l3 + + def get_loss(self, x, y): + return nn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1 = nn.gradients(loss, [self.w1, self.b1]) + return g_w1.data(), g_b1.data() + +class StdLinerTestModel: + def __init__(self, input_features, output_features, tmodel: LinearTestModel): + self.w1 = stdnn.Parameter(input_features, output_features) + self.b1 = stdnn.Parameter(1, output_features) + self.w1.data = np.array(tmodel.w1.data()).reshape(input_features, output_features) + self.b1.data = np.array(tmodel.b1.data()).reshape(1, output_features) + + def forward(self, x): + l1 = stdnn.Linear(x, self.w1) + l2 = stdnn.AddBias(l1, self.b1) + l3 = stdnn.ReLU(l2) + return l3 + + def get_loss(self, x, y): + return stdnn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1 = stdnn.gradients(loss, [self.w1, self.b1]) + return g_w1.data.flatten().tolist(), g_b1.data.flatten().tolist() + +input_features = 16 +output_features = 32 +batch_size = 4 +x = np.random.randn(batch_size, input_features).astype(np.float32) +y = np.random.randn(batch_size, output_features).astype(np.float32) + +model = LinearTestModel(input_features, output_features) +test_x = nn.Constant(x) +predict_y = model.forward(test_x).data() +test_y = nn.Constant(y) +loss = model.get_loss(test_x, test_y).data() +g_w1, g_b1 = model.backward(test_x, test_y) + +stdmodel = StdLinerTestModel(input_features, output_features, model) +std_test_x = stdnn.Constant(x) +std_predict_y = stdmodel.forward(std_test_x) +std_test_y = stdnn.Constant(y) +std_loss = stdmodel.get_loss(std_test_x, std_test_y) +std_g_w1, std_g_b1 = stdmodel.backward(std_test_x, std_test_y) + +# check forward +for x, y in zip(predict_y, std_predict_y.data.tolist()[0]): + if (abs(x-y) > 1e-4): + assert 0, "Forward data mismatch!" + +# check loss +if abs(loss[0] - std_loss.data) > 1e-4: + assert 0, "Loss mismatch!" + +# check backward +for i, (x, y) in enumerate(zip(g_w1, std_g_w1)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient w1 mismatch at position {i}, g_w1 is {x} while std g_w1 is {y}" +for i, (x, y) in enumerate(zip(g_b1, std_g_b1)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient b1 mismatch at position {i}, g_b1 is {x} while std g_b1 is {y}" + + +print("Test passed") \ No newline at end of file diff --git a/frontend/uct/test/04_2layers_test.py b/frontend/uct/test/04_2layers_test.py new file mode 100644 index 0000000..19d89a8 --- /dev/null +++ b/frontend/uct/test/04_2layers_test.py @@ -0,0 +1,144 @@ +import uctc.nn as nn +import std_model as stdnn +import numpy as np +np.random.seed(42) +class LinearTestModel: + def __init__(self, input_features, hidden_features, output_features): + self.w1 = nn.Parameter([input_features, hidden_features]) + self.b1 = nn.Parameter([1, hidden_features]) + self.w2 = nn.Parameter([hidden_features, output_features]) + self.b2 = nn.Parameter([1, output_features]) + + def forward(self, x): + l1 = nn.Linear(x, self.w1) + l2 = nn.AddBias(l1, self.b1) + l3 = nn.ReLU(l2) + l4 = nn.Linear(l3, self.w2) + l5 = nn.AddBias(l4, self.b2) + return l5 + + def get_loss(self, x, y): + return nn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + return g_w1.data(), g_b1.data(), g_w2.data(), g_b2.data() + + def update(self, x, y, lr): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + self.w1.update(g_w1, lr) + self.b1.update(g_b1, lr) + self.w2.update(g_w2, lr) + self.b2.update(g_b2, lr) + print(g_w1.data()) + print(g_b1.data()) + print(g_w2.data()) + print(g_b2.data()) + return self.w1.data(), self.b1.data(), self.w2.data(), self.b2.data() + + +class StdLinerTestModel: + def __init__(self, input_features, hidden_features, output_features, tmodel: LinearTestModel): + self.w1 = stdnn.Parameter(input_features, hidden_features) + self.b1 = stdnn.Parameter(1, hidden_features) + self.w2 = stdnn.Parameter(hidden_features, output_features) + self.b2 = stdnn.Parameter(1, output_features) + self.w1.data = np.array(tmodel.w1.data()).reshape(input_features, hidden_features) + self.b1.data = np.array(tmodel.b1.data()).reshape(1, hidden_features) + self.w2.data = np.array(tmodel.w2.data()).reshape(hidden_features, output_features) + self.b2.data = np.array(tmodel.b2.data()).reshape(1, output_features) + + + def forward(self, x): + l1 = stdnn.Linear(x, self.w1) + l2 = stdnn.AddBias(l1, self.b1) + l3 = stdnn.ReLU(l2) + l4 = stdnn.Linear(l3, self.w2) + l5 = stdnn.AddBias(l4, self.b2) + return l5 + + def get_loss(self, x, y): + return stdnn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + return g_w1.data.flatten().tolist(), g_b1.data.flatten().tolist(), g_w2.data.flatten().tolist(), g_b2.data.flatten().tolist() + + def update(self, x, y, lr): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + self.w1.update(g_w1, -lr) + self.b1.update(g_b1, -lr) + self.w2.update(g_w2, -lr) + self.b2.update(g_b2, -lr) + return self.w1.data.flatten().tolist(), self.b1.data.flatten().tolist(), self.w2.data.flatten().tolist(), self.b2.data.flatten().tolist() + +input_features = 1 +hidden_features = 50 +output_features = 1 +batch_size = 10 +x = np.array([-5.146528720855713, 4.451905250549316, 0.4736069440841675, -0.09472138434648514, 4.8939385414123535, 5.209676265716553, -5.967447280883789, 2.9363629817962646, -5.525413990020752, 3.315248489379883]).reshape(batch_size, -1) +y = np.array([0.9072322249412537, -0.9662654995918274, 0.45609915256500244, -0.09457980841398239, -0.9835651516914368, -0.8788799047470093, 0.3105180263519287, 0.2037920206785202, 0.6873041391372681, -0.17278438806533813]).reshape(batch_size, -1) + +model = LinearTestModel(input_features, hidden_features, output_features) +stdmodel = StdLinerTestModel(input_features, hidden_features, output_features, model) + +test_x = nn.Constant(x) +predict_y = model.forward(test_x).data() +test_y = nn.Constant(y) +loss = model.get_loss(test_x, test_y).data() +g_w1, g_b1, g_w2, g_b2 = model.backward(test_x, test_y) +new_w1, new_b1, new_w2, new_b2 = model.update(test_x, test_y, 0) + + +std_test_x = stdnn.Constant(x) +std_predict_y = stdmodel.forward(std_test_x) +std_test_y = stdnn.Constant(y) +std_loss = stdmodel.get_loss(std_test_x, std_test_y) +std_g_w1, std_g_b1, std_g_w2, std_g_b2 = stdmodel.backward(std_test_x, std_test_y) +std_new_w1, std_new_b1, std_new_w2, std_new_b2 = stdmodel.update(std_test_x, std_test_y, 0) + +# print(predict_y) +# print() +# print(std_predict_y.data.flatten().tolist()) +# check forward +for x, y in zip(predict_y, std_predict_y.data.flatten().tolist()): + if (abs(x-y) > 1e-4): + assert 0, "Forward data mismatch!" + +# print(loss, std_loss.data) +# check loss +if abs(loss[0] - std_loss.data) > 1e-4: + assert 0, "Loss mismatch!" + +# check backward +for i, (x, y) in enumerate(zip(g_w1, std_g_w1)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient w1 mismatch at position {i}, g_w1 is {x} while std g_w1 is {y}" +for i, (x, y) in enumerate(zip(g_b1, std_g_b1)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient b1 mismatch at position {i}, g_b1 is {x} while std g_b1 is {y}" +for i, (x, y) in enumerate(zip(g_w2, std_g_w2)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient w2 mismatch at position {i}, g_w2 is {x} while std g_w2 is {y}" +for i, (x, y) in enumerate(zip(g_b2, std_g_b2)): + if (abs(x-y) > 1e-4): + assert 0, f"Gradient b2 mismatch at position {i}, g_b2 is {x} while std g_b2 is {y}" + +# check update +for i, (x, y) in enumerate(zip(new_b1, std_new_b1)): + if (abs(x-y) > 1e-4): + assert 0, f"Updated b1 mismatch at position {i}, new_b1 is {x} while std new_b1 is {y}" +for i, (x, y) in enumerate(zip(new_w1, std_new_w1)): + if (abs(x-y) > 1e-4): + assert 0, f"Updated w1 mismatch at position {i}, new_w1 is {x} while std new_w1 is {y}" +# for i, (x, y) in enumerate(zip(new_b2, std_new_b2)): +# if (abs(x-y) > 1e-4): +# assert 0, f"Updated b2 mismatch at position {i}, new_b2 is {x} while std new_b2 is {y}" +# for i, (x, y) in enumerate(zip(new_w2, std_new_w2)): +# if (abs(x-y) > 1e-4): +# assert 0, f"Updated w2 mismatch at position {i}, new_w2 is {x} while std new_w2 is {y}" +print("Test passed") \ No newline at end of file diff --git a/frontend/uct/test/05_training_test.py b/frontend/uct/test/05_training_test.py new file mode 100644 index 0000000..5fdb7b0 --- /dev/null +++ b/frontend/uct/test/05_training_test.py @@ -0,0 +1,128 @@ +import uctc.nn as nn +import std_model as stdnn +import numpy as np +np.random.seed(42) +class LinearTestModel: + def __init__(self, input_features, hidden_features, output_features): + self.w1 = nn.Parameter([input_features, hidden_features]) + self.b1 = nn.Parameter([1, hidden_features]) + self.w2 = nn.Parameter([hidden_features, output_features]) + self.b2 = nn.Parameter([1, output_features]) + + def forward(self, x): + layer_1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1)) + prediction = nn.AddBias(nn.Linear(layer_1, self.w2), self.b2) + # print(f"o1: {prediction.data()[:10]}") + return prediction + + def get_loss(self, x, y): + return nn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + return g_w1.data(), g_b1.data(), g_w2.data(), g_b2.data() + + def update(self, x, y, lr): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + self.w1.update(g_w1, lr) + self.b1.update(g_b1, lr) + self.w2.update(g_w2, lr) + self.b2.update(g_b2, lr) + # print(g_w1.data()) + # print(g_b1.data()) + # print(g_w2.data()) + # print(g_b2.data()) + # return self.w1.data(), self.b1.data(), self.w2.data(), self.b2.data() + + def train(self): + self.x = np.expand_dims(np.linspace(-2 * np.pi, 2 * np.pi, num=200), axis=1) + # np.random.RandomState(0).shuffle(self.x) + self.argsort_x = np.argsort(self.x.flatten()) + self.y = np.sin(self.x) + for i in range(epoch): + np.random.RandomState(0).shuffle(self.x) + index = 0 + while index < self.x.shape[0]: + x = self.x[index:index + batch_size] + y = self.y[index:index + batch_size] + cx = nn.Constant(x) + cy = nn.Constant(y) + self.update(cx, cy, 0.01) + index += batch_size + # break + loss = self.get_loss(cx,cy) + print(loss.data()) + + +class StdLinerTestModel: + def __init__(self, input_features, hidden_features, output_features, tmodel: LinearTestModel): + self.w1 = stdnn.Parameter(input_features, hidden_features) + self.b1 = stdnn.Parameter(1, hidden_features) + self.w2 = stdnn.Parameter(hidden_features, output_features) + self.b2 = stdnn.Parameter(1, output_features) + # self.w1.data = np.array(tmodel.w1.data()).reshape(input_features, hidden_features) + # self.b1.data = np.array(tmodel.b1.data()).reshape(1, hidden_features) + # self.w2.data = np.array(tmodel.w2.data()).reshape(hidden_features, output_features) + # self.b2.data = np.array(tmodel.b2.data()).reshape(1, output_features) + # print(self.w1.data) + + + def forward(self, x): + layer_1 = stdnn.ReLU(stdnn.AddBias(stdnn.Linear(x, self.w1), self.b1)) + prediction = stdnn.AddBias(stdnn.Linear(layer_1, self.w2), self.b2) + # print(f"o2: {prediction.data.flatten()[:10]}") + return prediction + + def get_loss(self, x, y): + return stdnn.SquareLoss(self.forward(x), y) + + def backward(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + return g_w1.data.flatten().tolist(), g_b1.data.flatten().tolist(), g_w2.data.flatten().tolist(), g_b2.data.flatten().tolist() + + def update(self, x, y, lr): + # loss = self.get_loss(x, y) + # g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2]) + self.w1.update(g_w1, -lr) + self.b1.update(g_b1, -lr) + self.w2.update(g_w2, -lr) + self.b2.update(g_b2, -lr) + # print(loss.data) + # return self.w1.data.flatten().tolist(), self.b1.data.flatten().tolist(), self.w2.data.flatten().tolist(), self.b2.data.flatten().tolist() + + def train(self): + self.x = np.expand_dims(np.linspace(-2 * np.pi, 2 * np.pi, num=200), axis=1) + self.argsort_x = np.argsort(self.x.flatten()) + self.y = np.sin(self.x) + for i in range(epoch): + # np.random.RandomState(0).shuffle(self.x) + index = 0 + while index < self.x.shape[0]: + x = self.x[index:index + batch_size] + y = self.y[index:index + batch_size] + cx = stdnn.Constant(x) + cy = stdnn.Constant(y) + self.update(cx, cy, 0.01) + index += batch_size + break + loss = self.get_loss(cx, cy) + print(loss.data) + +input_features = 1 +hidden_features = 50 +output_features = 1 +batch_size = 10 +epoch = 1 + +model = LinearTestModel(input_features, hidden_features, output_features) +smodel = StdLinerTestModel(input_features, hidden_features, output_features, model) + +# model.train() + + +smodel.train() \ No newline at end of file diff --git a/frontend/uct/test/06_mnist_test.py b/frontend/uct/test/06_mnist_test.py new file mode 100644 index 0000000..de0f268 --- /dev/null +++ b/frontend/uct/test/06_mnist_test.py @@ -0,0 +1,144 @@ +import uctc.nn as nn +import std_model as stdnn +import numpy as np +from data6 import x, y +np.random.seed(42) + +def parameter_data(*shape): + assert len(shape) == 2, ( + "Shape must have 2 dimensions, instead has {}".format(len(shape))) + assert all(isinstance(dim, int) and dim > 0 for dim in shape), ( + "Shape must consist of positive integers, got {!r}".format(shape)) + limit = np.sqrt(3.0 / np.mean(shape)) + data = np.random.uniform(low=-limit, high=limit, size=shape).astype(np.float32) + return data + + +class MNISTModel: + def __init__(self): + self.input_features = 784 + self.h1 = 200 + self.h2 = 100 + self.output_features = 10 + self.lr = 0.01 + self.batch_size = 100 + self.w1data = parameter_data(self.input_features, self.h1) + self.b1data = parameter_data(1, self.h1) + self.w2data = parameter_data(self.h1, self.h2) + self.b2data = parameter_data(1, self.h2) + self.w3data = parameter_data(self.h2, self.output_features) + self.b3data = parameter_data(1, self.output_features) + self.w1 = nn.Parameter(self.w1data) + self.b1 = nn.Parameter(self.b1data) + self.w2 = nn.Parameter(self.w2data) + self.b2 = nn.Parameter(self.b2data) + self.w3 = nn.Parameter(self.w3data) + self.b3 = nn.Parameter(self.b3data) + + def run(self, x): + l1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1)) + l2 = nn.ReLU(nn.AddBias(nn.Linear(l1, self.w2), self.b2)) + l3 = nn.AddBias(nn.Linear(l2, self.w3), self.b3) + return l3 + + def get_loss(self, x, y): + return nn.SoftmaxLoss(self.run(x), y) + + def train(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2, g_w3, g_b3 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3]) + self.w1.update(g_w1, self.lr) + self.b1.update(g_b1, self.lr) + self.w2.update(g_w2, self.lr) + self.b2.update(g_b2, self.lr) + self.w3.update(g_w3, self.lr) + self.b3.update(g_b3, self.lr) + return g_w1.data(), g_b1.data(), g_w2.data(), g_b2.data(), g_w3.data(), g_b3.data() + +class StdMNISTModel: + def __init__(self, model: MNISTModel): + self.input_features = 784 + self.h1 = 200 + self.h2 = 100 + self.output_features = 10 + self.lr = 0.01 + self.batch_size = 100 + self.w1 = stdnn.Parameter(self.input_features, self.h1) + self.w1.data = model.w1data + self.b1 = stdnn.Parameter(1, self.h1) + self.b1.data = model.b1data + self.w2 = stdnn.Parameter(self.h1, self.h2) + self.w2.data = model.w2data + self.b2 = stdnn.Parameter(1, self.h2) + self.b2.data = model.b2data + self.w3 = stdnn.Parameter(self.h2, self.output_features) + self.w3.data = model.w3data + self.b3 = stdnn.Parameter(1, self.output_features) + self.b3.data = model.b3data + + + def run(self, x): + l1 = stdnn.ReLU(stdnn.AddBias(stdnn.Linear(x, self.w1), self.b1)) + l2 = stdnn.ReLU(stdnn.AddBias(stdnn.Linear(l1, self.w2), self.b2)) + l3 = stdnn.AddBias(stdnn.Linear(l2, self.w3), self.b3) + return l3 + + def get_loss(self, x, y): + return stdnn.SoftmaxLoss(self.run(x), y) + + def train(self, x, y): + loss = self.get_loss(x, y) + g_w1, g_b1, g_w2, g_b2, g_w3, g_b3 = stdnn.gradients(loss, [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3]) + self.w1.update(g_w1, -self.lr) + self.b1.update(g_b1, -self.lr) + self.w2.update(g_w2, -self.lr) + self.b2.update(g_b2, -self.lr) + self.w3.update(g_w3, -self.lr) + self.b3.update(g_b3, -self.lr) + return g_w1.data.flatten().tolist(), g_b1.data.flatten().tolist(), g_w2.data.flatten().tolist(), g_b2.data.flatten().tolist(), g_w3.data.flatten().tolist(), g_b3.data.flatten().tolist() + +model = MNISTModel() +smodel = StdMNISTModel(model) + +o1_x = nn.Constant(x) +o1_y = nn.Constant(y) +o1_out = model.run(o1_x).data() +print(o1_out) +# o1_loss = model.get_loss(o1_x, o1_y) +# print(o1_loss.data()[0]) +# o1_gw1, o1_gb1, o1_gw2, o1_gb2, o1_gw3, o1_gb3 = model.train(o1_x, o1_y) + +o2_x = stdnn.Constant(x) +o2_y = stdnn.Constant(y) +o2_out = smodel.run(o2_x).data +print(o2_out) +# o2_loss = smodel.get_loss(o2_x, o2_y) +# print(o2_loss.data) +# o2_gw1, o2_gb1, o2_gw2, o2_gb2, o2_gw3, o2_gb3 = smodel.train(o2_x, o2_y) + +# for i, (a, b) in enumerate(zip(o1_gw1, o2_gw1)): +# if abs(a - b) > 1e-4: +# print(f"gw1 failed: {i, a, b}") +# break +# for i, (a, b) in enumerate(zip(o1_gb1, o2_gb1)): +# if abs(a - b) > 1e-4: +# print(f"gb1 failed: {i, a, b}") +# break +# for i, (a, b) in enumerate(zip(o1_gw2, o2_gw2)): +# if abs(a - b) > 1e-4: +# print(f"gw2 failed: {i, a, b}") +# break +# for i, (a, b) in enumerate(zip(o1_gb2, o2_gb2)): +# if abs(a - b) > 1e-4: +# print(f"gb2 failed: {i, a, b}") +# break +# for i, (a, b) in enumerate(zip(o1_gw3, o2_gw3)): +# if abs(a - b) > 1e-4: +# print(f"gw3 failed: {i, a, b}") +# break +# for i, (a, b) in enumerate(zip(o1_gb3, o2_gb3)): +# if abs(a - b) > 1e-4: +# print(f"gb3 failed: {i, a, b}") +# break +# print(o1_loss.data()[0], o2_loss.data) +print("PASSED") \ No newline at end of file diff --git a/frontend/uct/test/data6.py b/frontend/uct/test/data6.py new file mode 100644 index 0000000..f144b96 --- /dev/null +++ b/frontend/uct/test/data6.py @@ -0,0 +1,4 @@ +import numpy as np +x = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294117748737335, 0.7254902124404907, 0.6235294342041016, 0.5921568870544434, 0.2352941334247589, 0.1411764770746231, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8705883026123047, 0.9960784912109375, 0.9960784912109375, 0.9960784912109375, 0.9960784912109375, 0.9450981020927429, 0.7764706611633301, 0.7764706611633301, 0.7764706611633301, 0.7764706611633301, 0.7764706611633301, 0.7764706611633301, 0.7764706611633301, 0.7764706611633301, 0.6666666865348816, 0.2039215862751007, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.26274511218070984, 0.44705885648727417, 0.2823529541492462, 0.44705885648727417, 0.6392157077789307, 0.8901961445808411, 0.9960784912109375, 0.8823530077934265, 0.9960784912109375, 0.9960784912109375, 0.9960784912109375, 0.9803922176361084, 0.8980392813682556, 0.9960784912109375, 0.9960784912109375, 0.5490196347236633, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06666667014360428, 0.25882354378700256, 0.05490196496248245, 0.26274511218070984, 0.26274511218070984, 0.26274511218070984, 0.23137256503105164, 0.08235294371843338, 0.9254902601242065, 0.9960784912109375, 0.41568630933761597, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.32549020648002625, 0.9921569228172302, 0.8196079134941101, 0.07058823853731155, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08627451211214066, 0.9137255549430847, 1.0, 0.32549020648002625, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5058823823928833, 0.9960784912109375, 0.9333333969116211, 0.1725490242242813, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.23137256503105164, 0.9764706492424011, 0.9960784912109375, 0.24313727021217346, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5215686559677124, 0.9960784912109375, 0.7333333492279053, 0.019607843831181526, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03529411926865578, 0.803921639919281, 0.9725490808486938, 0.22745099663734436, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4941176772117615, 0.9960784912109375, 0.7137255072593689, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.29411765933036804, 0.9843137860298157, 0.9411765336990356, 0.22352942824363708, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07450980693101883, 0.8666667342185974, 0.9960784912109375, 0.6509804129600525, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.011764707043766975, 0.7960785031318665, 0.9960784912109375, 0.8588235974311829, 0.13725490868091583, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14901961386203766, 0.9960784912109375, 0.9960784912109375, 0.3019607961177826, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12156863510608673, 0.8784314393997192, 0.9960784912109375, 0.45098042488098145, 0.003921568859368563, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5215686559677124, 0.9960784912109375, 0.9960784912109375, 0.2039215862751007, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2392157018184662, 0.9490196704864502, 0.9960784912109375, 0.9960784912109375, 0.2039215862751007, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4745098352432251, 0.9960784912109375, 0.9960784912109375, 0.8588235974311829, 0.1568627506494522, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4745098352432251, 0.9960784912109375, 0.8117647767066956, 0.07058823853731155, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14901961386203766, 0.9960784912109375, 0.4274510145187378, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.34117648005485535, 0.988235354423523, 0.32156863808631897, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.529411792755127, 0.9450981020927429, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1764705926179886, 0.9568628072738647, 0.5882353186607361, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294117748737335, 0.9960784912109375, 0.24705883860588074, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7921569347381592, 0.874509871006012, 0.04313725605607033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.125490203499794, 0.9960784912109375, 0.847058892250061, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.37254902720451355, 0.9960784912109375, 0.7647059559822083, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5490196347236633, 0.9960784912109375, 0.3019607961177826, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22352942824363708, 0.9294118285179138, 0.803921639919281, 0.0313725508749485, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4862745404243469, 1.0, 0.6470588445663452, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6705882549285889, 0.9960784912109375, 0.3176470696926117, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0941176563501358, 0.9098039865493774, 0.8431373238563538, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4705882668495178, 0.9960784912109375, 0.6235294342041016, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5921568870544434, 0.9960784912109375, 0.5568627715110779, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8941177129745483, 0.9960784912109375, 0.25882354378700256, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2392157018184662, 0.9843137860298157, 0.9960784912109375, 0.25882354378700256, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5529412031173706, 0.9960784912109375, 0.803921639919281, 0.011764707043766975, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03921568766236305, 0.8431373238563538, 0.9960784912109375, 0.4745098352432251, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.019607843831181526, 0.7764706611633301, 0.6901960968971252, 0.03921568766236305, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]).reshape(2, 784) + +y = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]).reshape(100, 10) \ No newline at end of file diff --git a/frontend/uct/test/std_model.py b/frontend/uct/test/std_model.py new file mode 100644 index 0000000..1f6eded --- /dev/null +++ b/frontend/uct/test/std_model.py @@ -0,0 +1,393 @@ +import numpy as np + +def format_shape(shape): + return "x".join(map(str, shape)) if shape else "()" + +class Node(object): + def __repr__(self): + return "<{} shape={} at {}>".format( + type(self).__name__, format_shape(self.data.shape), hex(id(self))) + +class DataNode(Node): + """ + DataNode is the parent class for Parameter and Constant nodes. + + You should not need to use this class directly. + """ + def __init__(self, data): + self.parents = [] + self.data = data + + def _forward(self, *inputs): + return self.data + + @staticmethod + def _backward(gradient, *inputs): + return [] + +class Parameter(DataNode): + """ + A Parameter node stores parameters used in a neural network (or perceptron). + + Use the the `update` method to update parameters when training the + perceptron or neural network. + """ + def __init__(self, *shape): + assert len(shape) == 2, ( + "Shape must have 2 dimensions, instead has {}".format(len(shape))) + assert all(isinstance(dim, int) and dim > 0 for dim in shape), ( + "Shape must consist of positive integers, got {!r}".format(shape)) + limit = np.sqrt(3.0 / np.mean(shape)) + data = np.random.uniform(low=-limit, high=limit, size=shape) + super().__init__(data) + + def update(self, direction, multiplier): + assert isinstance(direction, Constant), ( + "Update direction must be a {} node, instead has type {!r}".format( + Constant.__name__, type(direction).__name__)) + assert direction.data.shape == self.data.shape, ( + "Update direction shape {} does not match parameter shape " + "{}".format( + format_shape(direction.data.shape), + format_shape(self.data.shape))) + assert isinstance(multiplier, (int, float)), ( + "Multiplier must be a Python scalar, instead has type {!r}".format( + type(multiplier).__name__)) + self.data += multiplier * direction.data + assert np.all(np.isfinite(self.data)), ( + "Parameter contains NaN or infinity after update, cannot continue") + +class Constant(DataNode): + """ + A Constant node is used to represent: + * Input features + * Output labels + * Gradients computed by back-propagation + + You should not need to construct any Constant nodes directly; they will + instead be provided by either the dataset or when you call `nn.gradients`. + """ + def __init__(self, data): + assert isinstance(data, np.ndarray), ( + "Data should be a numpy array, instead has type {!r}".format( + type(data).__name__)) + assert np.issubdtype(data.dtype, np.floating), ( + "Data should be a float array, instead has data type {!r}".format( + data.dtype)) + super().__init__(data) + +class FunctionNode(Node): + """ + A FunctionNode represents a value that is computed based on other nodes. + The FunctionNode class performs necessary book-keeping to compute gradients. + """ + def __init__(self, *parents): + assert all(isinstance(parent, Node) for parent in parents), ( + "Inputs must be node objects, instead got types {!r}".format( + tuple(type(parent).__name__ for parent in parents))) + self.parents = parents + self.data = self._forward(*(parent.data for parent in parents)) + +class Add(FunctionNode): + """ + Adds matrices element-wise. + + Usage: nn.Add(x, y) + Inputs: + x: a Node with shape (batch_size x num_features) + y: a Node with the same shape as x + Output: + a Node with shape (batch_size x num_features) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape == inputs[1].shape, ( + "Input shapes should match, instead got {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return inputs[0] + inputs[1] + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape == inputs[0].shape + return [gradient, gradient] + +class AddBias(FunctionNode): + """ + Adds a bias vector to each feature vector + + Usage: nn.AddBias(features, bias) + Inputs: + features: a Node with shape (batch_size x num_features) + bias: a Node with shape (1 x num_features) + Output: + a Node with shape (batch_size x num_features) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[1].shape[0] == 1, ( + "First dimension of second input should be 1, instead got shape " + "{}".format(format_shape(inputs[1].shape))) + assert inputs[0].shape[1] == inputs[1].shape[1], ( + "Second dimension of inputs should match, instead got shapes {} " + "and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return inputs[0] + inputs[1] + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape == inputs[0].shape + return [gradient, np.sum(gradient, axis=0, keepdims=True)] + +class DotProduct(FunctionNode): + """ + Batched dot product + + Usage: nn.DotProduct(features, weights) + Inputs: + features: a Node with shape (batch_size x num_features) + weights: a Node with shape (1 x num_features) + Output: a Node with shape (batch_size x 1) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[1].shape[0] == 1, ( + "First dimension of second input should be 1, instead got shape " + "{}".format(format_shape(inputs[1].shape))) + assert inputs[0].shape[1] == inputs[1].shape[1], ( + "Second dimension of inputs should match, instead got shapes {} " + "and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return np.dot(inputs[0], inputs[1].T) + + @staticmethod + def _backward(gradient, *inputs): + # assert gradient.shape[0] == inputs[0].shape[0] + # assert gradient.shape[1] == 1 + # return [np.dot(gradient, inputs[1]), np.dot(gradient.T, inputs[0])] + raise NotImplementedError( + "Backpropagation through DotProduct nodes is not needed in this " + "assignment") + +class Linear(FunctionNode): + """ + Applies a linear transformation (matrix multiplication) to the input + + Usage: nn.Linear(features, weights) + Inputs: + features: a Node with shape (batch_size x input_features) + weights: a Node with shape (input_features x output_features) + Output: a node with shape (batch_size x input_features) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape[1] == inputs[1].shape[0], ( + "Second dimension of first input should match first dimension of " + "second input, instead got shapes {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return np.dot(inputs[0], inputs[1]) + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape[0] == inputs[0].shape[0] + assert gradient.shape[1] == inputs[1].shape[1] + return [np.dot(gradient, inputs[1].T), np.dot(inputs[0].T, gradient)] + +class ReLU(FunctionNode): + """ + An element-wise Rectified Linear Unit nonlinearity: max(x, 0). + This nonlinearity replaces all negative entries in its input with zeros. + + Usage: nn.ReLU(x) + Input: + x: a Node with shape (batch_size x num_features) + Output: a Node with the same shape as x, but no negative entries + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 1, "Expected 1 input, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "Input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + return np.maximum(inputs[0], 0) + + @staticmethod + def _backward(gradient, *inputs): + assert gradient.shape == inputs[0].shape + return [gradient * np.where(inputs[0] > 0, 1.0, 0.0)] + +class SquareLoss(FunctionNode): + """ + This node first computes 0.5 * (a[i,j] - b[i,j])**2 at all positions (i,j) + in the inputs, which creates a (batch_size x dim) matrix. It then calculates + and returns the mean of all elements in this matrix. + + Usage: nn.SquareLoss(a, b) + Inputs: + a: a Node with shape (batch_size x dim) + b: a Node with shape (batch_size x dim) + Output: a scalar Node (containing a single floating-point number) + """ + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape == inputs[1].shape, ( + "Input shapes should match, instead got {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + return np.mean(np.square(inputs[0] - inputs[1]) / 2) + + @staticmethod + def _backward(gradient, *inputs): + assert np.asarray(gradient).ndim == 0 + return [ + gradient * (inputs[0] - inputs[1]) / inputs[0].size, + gradient * (inputs[1] - inputs[0]) / inputs[0].size + ] + +class SoftmaxLoss(FunctionNode): + """ + A batched softmax loss, used for classification problems. + + IMPORTANT: do not swap the order of the inputs to this node! + + Usage: nn.SoftmaxLoss(logits, labels) + Inputs: + logits: a Node with shape (batch_size x num_classes). Each row + represents the scores associated with that example belonging to a + particular class. A score can be an arbitrary real number. + labels: a Node with shape (batch_size x num_classes) that encodes the + correct labels for the examples. All entries must be non-negative + and the sum of values along each row should be 1. + Output: a scalar Node (containing a single floating-point number) + """ + @staticmethod + def log_softmax(logits): + log_probs = logits - np.max(logits, axis=1, keepdims=True) + log_probs -= np.log(np.sum(np.exp(log_probs), axis=1, keepdims=True)) + return log_probs + + @staticmethod + def _forward(*inputs): + assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) + assert inputs[0].ndim == 2, ( + "First input should have 2 dimensions, instead has {}".format( + inputs[0].ndim)) + assert inputs[1].ndim == 2, ( + "Second input should have 2 dimensions, instead has {}".format( + inputs[1].ndim)) + assert inputs[0].shape == inputs[1].shape, ( + "Input shapes should match, instead got {} and {}".format( + format_shape(inputs[0].shape), format_shape(inputs[1].shape))) + assert np.all(inputs[1] >= 0), ( + "All entries in the labels input must be non-negative") + assert np.allclose(np.sum(inputs[1], axis=1), 1), ( + "Labels input must sum to 1 along each row") + log_probs = SoftmaxLoss.log_softmax(inputs[0]) + return np.mean(-np.sum(inputs[1] * log_probs, axis=1)) + + @staticmethod + def _backward(gradient, *inputs): + assert np.asarray(gradient).ndim == 0 + log_probs = SoftmaxLoss.log_softmax(inputs[0]) + return [ + gradient * (np.exp(log_probs) - inputs[1]) / inputs[0].shape[0], + gradient * -log_probs / inputs[0].shape[0] + ] + +def gradients(loss, parameters): + """ + Computes and returns the gradient of the loss with respect to the provided + parameters. + + Usage: nn.gradients(loss, parameters) + Inputs: + loss: a SquareLoss or SoftmaxLoss node + parameters: a list (or iterable) containing Parameter nodes + Output: a list of Constant objects, representing the gradient of the loss + with respect to each provided parameter. + """ + + assert isinstance(loss, (SquareLoss, SoftmaxLoss)), ( + "Loss must be a loss node, instead has type {!r}".format( + type(loss).__name__)) + assert all(isinstance(parameter, Parameter) for parameter in parameters), ( + "Parameters must all have type {}, instead got types {!r}".format( + Parameter.__name__, + tuple(type(parameter).__name__ for parameter in parameters))) + assert not hasattr(loss, "used"), ( + "Loss node has already been used for backpropagation, cannot reuse") + + loss.used = True + + nodes = set() + tape = [] + + def visit(node): + if node not in nodes: + for parent in node.parents: + visit(parent) + nodes.add(node) + tape.append(node) + + visit(loss) + nodes |= set(parameters) + + grads = {node: np.zeros_like(node.data) for node in nodes} + grads[loss] = 1.0 + + for node in reversed(tape): + parent_grads = node._backward( + grads[node], *(parent.data for parent in node.parents)) + for parent, parent_grad in zip(node.parents, parent_grads): + grads[parent] += parent_grad + + return [Constant(grads[parameter]) for parameter in parameters] + +def as_scalar(node): + """ + Returns the value of a Node as a standard Python number. This only works + for nodes with one element (e.g. SquareLoss and SoftmaxLoss, as well as + DotProduct with a batch size of 1 element). + """ + + assert isinstance(node, Node), ( + "Input must be a node object, instead has type {!r}".format( + type(node).__name__)) + assert node.data.size == 1, ( + "Node has shape {}, cannot convert to a scalar".format( + format_shape(node.data.shape))) + node.data = node.data.flatten() + return node.data.tolist()[0] diff --git a/frontend/uct/transformer.py b/frontend/uct/transformer.py new file mode 100644 index 0000000..e69de29 diff --git a/frontend/uct/utils.py b/frontend/uct/utils.py new file mode 100644 index 0000000..380885f --- /dev/null +++ b/frontend/uct/utils.py @@ -0,0 +1,45 @@ +import numpy as np +import uctc.nn as nn +np.random.seed(42) +def parameter_data(*shape): + assert len(shape) == 2, ( + "Shape must have 2 dimensions, instead has {}".format(len(shape))) + assert all(isinstance(dim, int) and dim > 0 for dim in shape), ( + "Shape must consist of positive integers, got {!r}".format(shape)) + limit = np.sqrt(3.0 / np.mean(shape)) + data = np.random.uniform(low=-limit, high=limit, size=shape).astype(np.float32) + return data + +class Dataset(object): + def __init__(self, x, y): + assert isinstance(x, np.ndarray) + assert isinstance(y, np.ndarray) + assert np.issubdtype(x.dtype, np.floating) + assert np.issubdtype(y.dtype, np.floating) + assert x.ndim == 2 + assert y.ndim == 2 + assert x.shape[0] == y.shape[0] + self.x = x + self.y = y + + def iterate_once(self, batch_size): + assert isinstance(batch_size, int) and batch_size > 0, ( + f"Batch size should be a positive integer, got {batch_size}") + assert self.x.shape[0] % batch_size == 0, ( + f"Dataset size {self.x.shape[0]} is not divisible by batch size {batch_size}") + index = 0 + while index < self.x.shape[0]: + x = self.x[index:index + batch_size] + y = self.y[index:index + batch_size] + yield nn.Constant(x), nn.Constant(y) + index += batch_size + + def iterate_forever(self, batch_size): + while True: + yield from self.iterate_once(batch_size) + + def get_validation_accuracy(self): + raise NotImplementedError( + "No validation data is available for this dataset. " + "In this assignment, only the Digit Classification and Language " + "Identification datasets have validation data.") \ No newline at end of file diff --git a/lab-guide/00-intro.md b/lab-guide/00-intro.md new file mode 100644 index 0000000..ffe0fc8 --- /dev/null +++ b/lab-guide/00-intro.md @@ -0,0 +1,36 @@ +### Welcome to uct lab + +> uct 是Undergraduate Computing Torch的简写。 + +欢迎你选择uct作为自己的大实验,在这个大实验中,我们将亲自动手使用C++搭建一个机器学习框架,并完成手写体数据集MNIST的识别。 + +注意:你不需要获得任何对于神经网络的前置知识,考虑到《大学计算(下)》面向的是本科一年级学生,我们设计了非常详细的实验指导书帮助你完成这个实验。 + +#### 安装构建工具 + +大型的C++项目显然不止是几个文件,而是成百上千个文件,因此我们需要一个工具来管理这些文件。有很多课程会使用到类似的工具(在《操作系统》课程上,你将会遇见Makefile;在《编译原理》、《并行编译与优化》上,你将会用到CMake),在这里我们选择CMake。 + +> CMake 是一个开源的跨平台构建系统生成工具,广泛用于管理软件构建过程。它通过生成标准的构建文件(如 Makefile、Visual Studio 项目文件等)来简化跨平台项目的构建流程。 + +> 对于经验丰富的同学,如果你喜欢使用别的构建工具(例如Bazel)也是可以的~ + +假如你也正在使用WSL(2),运行下面的命令可以安装好所需要的工具和库 + +```bash +sudo apt update +sudo apt install -y build-essential cmake git gcc g++ +``` + +#### 准备Python环境 + +首先,你需要在Linux下具备Python环境。相信在《大学计算(上)》中,你已经具备这样的技能。我们以使用WSL+VSCode为例介绍环境配置的具体方案。 + +在VSCode中连接WSL,打开对应目录。 + +使用`conda`创建一个环境(或使用已有环境),然后执行 + +``` +pip install pybind11 +``` + +而后,通过`pip show pybind11`可以找到`pybind11`的安装路径,将对应的头文件路径添加到`.vscode/c_cpp_properties.json`的`includePath`中。 \ No newline at end of file diff --git a/lab-guide/01-fundamentals.md b/lab-guide/01-fundamentals.md new file mode 100644 index 0000000..9b7edbe --- /dev/null +++ b/lab-guide/01-fundamentals.md @@ -0,0 +1,117 @@ +### 第一部分:基本操作 + +#### 基本函数的构建 + +在这一部分中,我们将完成基本的四则运算和由它们组合而成的初等函数的构建。你需要在cc/operators中补全`ops.h`和`ops.cc`的内容。 + +**[TASK 1]** 在`ops.h`中,你需要补全以下函数的实现: + +- `mul`函数,输入为两个数`a`、`b`,输出为它们的乘积。 + +- `id`函数,将输入原样输出。 + +- `add`函数,输入为两个数`a`、`b`,输出为它们的和。 + +- `neg`函数,输入为`a`,输出为`-a`。 + +- `lt`函数,输入为两个数`a`、`b`,输出为`(float)(a < b)`。 + +- `eq`函数,输入为两个数`a`、`b`,输出为`(float)(a == b)`。 + +- `max`函数,输入为两个数`a`、`b`,输出为`a`和`b`中较大的那个。 + +它们都是模板函数,相信你已经注意到了,它们都被定义在`.h`文件中,而不是`.cc`文件中,这与C++的模板的实例化机制和编译模型有关。 + +模板的实例化机制:模板函数或模板类并不是真正的代码,而是一个“蓝图”或“模式”,编译器在编译时根据这个蓝图生成具体的代码。这个过程称为模板实例化。例如,当你使用一个模板函数时,编译器会根据你传递的类型参数生成一个具体的函数版本。这个生成的过程发生在编译时。 + +编译模型:C++采用的是分离编译模型,即每个源文件(.cc 或 .cpp 文件)是独立编译的。编译器在编译一个源文件时,只会看到该源文件及其包含的头文件中的内容。如果你将模板函数的定义放在源文件中,其他源文件在编译时无法看到模板的定义,因此无法生成对应的实例化代码。 + +另外,你应当还注意到了我们为这两个文件提供了名叫`operators`的命名空间(namespace)。主要是为了防止不同命名空间中的重名冲突。 + +**[TASK 2]** 在`ops.cc`中,你需要完成以下函数的实现: + +- `is_close`函数,输入为两个数`x`、`y`,输出为`(float)(abs(x - y) < epsilon)`。 + +- `sigmoid`函数,输入为`x`,为了方便计算,在输出时遵照下面的规则: + +$$ +f(x) =\left\{\begin{matrix} +\frac{1.0}{(1.0 + e^{-x})}, x\ge 0 + \\ +\frac{e^x}{(1.0 + e^{x})}, \mathrm{otherwise} +\end{matrix}\right. +$$ + +- `relu`函数,输入为`x`,输出为`x > 0.0 ? x : 0.0`。 + +- `inv`函数,输入为`x`,输出为`1.0 / x`。 + +- `inv_back`函数,用于计算$f(x)=\frac{1}{x}$的微分$f(x)\mathrm{d}x$,输入为`x`和`d`,输出为$-\frac{d}{x^2}$。 + +- `relu_back`函数,输入为`x`和`d`,输出为`x > 0.0 ? d*1.0 : 0.0`。 + +#### 函数式编程基础 + +实现`map`、`zipWith`和`reduce`。 + +`map`接受一个`std::vector`和一个函数作为输入,返回一个新的`std::vector`,其中每个元素都是输入函数应用于输入`std::vector`中对应元素的结果。具体来说,对于下面这个实现: + +```cpp +template +auto map(const std::vector& vec, F func) -> std::vector()))> { + + std::vector()))> result; + result.reserve(vec.size()); + + std::transform(vec.begin(), vec.end(), std::back_inserter(result), func); + + return result; +} +``` + +有几处可能让你感到疑惑的地方。 + +首先,这里的函数返回值居然和Python一样被后置了!`->` 是 C++11 引入的尾置返回类型语法。它的作用是将函数的返回类型放在函数参数列表之后,而不是放在函数名之前。在某些情况下,返回类型可能依赖于函数参数或模板参数,而这些信息在函数名之前是不可用的。尾置返回类型允许我们在函数参数列表之后推导返回类型。 + +> 例如,在`map`函数中,返回类型依赖于`func`的返回类型,而`func`的类型在函数名之前是未知的。使用尾置返回类型可以解决这个问题。 + +其次,我们使用了`std::declval`。`std::declval`是 C++11 引入的一个工具,用于在编译时模拟一个对象的“假实例”,以便在不实际构造对象的情况下推导类型。 + +```cpp +decltype(func(std::declval())) +``` + +> 在`map`函数中,我们需要推导`func`的返回类型。假设`func`是一个函数对象,接受`T`类型的参数并返回某种类型`R`,我们可以使用`std::declval`来模拟调用`func`的过程。 + +**[TASK 3]** 在`ops.cc`中,调用我们给出的`map`函数实现和你刚刚完成的`neg`函数,补全`negList`函数(大约需要1行代码)。 + +**[TASK 4]** 在`ops.h`中,仿照`map`函数,补全`zipWith`函数(大约需要10行代码)。`zipWidth`函数接受两个`vector`和一个函数`func`作为输入,要得到一个新的`vector`,这个`vector`中的元素都是两个`vector`逐元素进行函数`func`操作之后的结果。例如,对于`vec1 = [1, 2, 3]`,`vec2 = [5, 6, 7]`,`func`为`add`,那么将返回`[6, 8, 10]`。注意:在进行`zipWith`函数的实现时,你需要考虑输入的两个`std::vector`长度不一致的情况,对于这种情况,你简单地`throw`一个异常即可。 + +**[TASK 5]** 在`ops.cc`中,使用你实现的`zipWith`和`add`函数,实现`addLists`函数(大约需要1行代码)。 + +**[TASK 6]** 实际上你会发现`std::accumulate`(问一问LLM这个是个啥)就能够承担`reduce`函数的功能,因此你可以直接使用`std::accumulate`来实现`reduce`函数。这个任务需要你使用`reduce`函数实现`sumList`(将一个列表中的元素相加)和`prodList`(将一个列表中的元素相乘)函数(大约分别需要1行代码)。 + +#### 检查结果 + +做完了?很好,切换到`cc`,执行下面的语句来编译框架 + +``` +cmake -S . -B build +cd build +make +``` +现在,编辑系统环境变量 + +``` +echo 'export PYTHONPATH="??????"' >> ~/.bashrc +``` + +将??????替换为将刚刚生成的`build`文件夹的绝对目录直接粘贴到这里,这个文件夹的目录应该形如 + +```Python +/home/hexu/learn/uc-modern-cpp-student/cc/build +``` + +> 可以切换到`build`目录下,执行`pwd`命令来获取绝对路径。 + +好了,不出意外的话,就再也别动`~/.bashrc`了。现在还有一个`frontend/framework/basis/test_task1.py`文件。切换到目录`frontend/framework/basis/`,直接运行task1到task6的文件,如果没有任何报错,说明你已经完成了这一关!🎉 \ No newline at end of file diff --git a/lab-guide/02-autodiff.md b/lab-guide/02-autodiff.md new file mode 100644 index 0000000..eb6a4a1 --- /dev/null +++ b/lab-guide/02-autodiff.md @@ -0,0 +1,56 @@ +### 第二部分:自动微分 + +#### 数值微分 + +有时候,我们无需知道一个函数具体的表达式,借助导数的定义,利用计算机可以求解出在某一点的导数值。这种方法称为数值微分。举个例子,对于任何一个$f(x)$,我们当然可以根据定义求出其在$x=x_0$处的导数,即 + +$$f'(x)|_{x=x_0} = \frac{f(x_0+\varepsilon)-f(x_0 - \varepsilon)}{2\varepsilon }$$ + +其中$\varepsilon$是一个很小的正数。但是,如果$f(x)$的表达式非常复杂,那么我们可能无法直接求出导数。此时,我们可以借助数值微分来求解导数值。下面我们以$f(x)=x^2$为例,演示如何使用数值微分求解导数值。 + +```python +import numpy as np + +def f(x): + return x**2 + +def numerical_diff(f, x): + h = 1e-4 + return (f(x+h) - f(x-h)) / (2*h) + +x = 5.0 +``` + +当然,你现在需要用C++来完成这件事。 + +**[TASK 7]** 补全`operators/autodiff.h`中的`central_difference`函数,实现数值微分,求出$f(x_1, x_2, ..., x_n)$在第$arg$个参数处的导数值。 + + +#### 高等数学中的导数 + +还记得$z = x + y$,对$x$和$y$分别求导的结果是什么吗?显然,根据多元函数的求导法则,有$\frac{\partial z}{\partial x}=1$,以及$\frac{\partial z}{\partial y}=1$。如果我们再考虑梯度,那么$z$的梯度就是$\nabla z = (1, 1)$。那么,对于更复杂的函数,比如$f(x, y) = x^2 + y^2$,其梯度$\nabla f$又是什么呢? + +**[TASK 8]** 补全`operators/autodiff.h`中的`Add`类,能够对表达式$z = x + y$求导。 + +提示:补全`forward`和`backward`函数,分别实现前向传播和反向传播。前向传播:得到`a + b`的值;反向传播,得到`a`和`b`的梯度(也就是`a`、`b`分别对于结果的导数再乘上梯度`d_input`)。 + +**[TASK 9]** 仿照`Add`类构造`operators/autodiff.h`中的`Mul`类,能够对表达式$z = x \cdot y$求导。 + +**[TASK 10]** 仿照`Add`类构造`operators/autodiff.h`中的`Log`类,能够对表达式$z = log(x)$求导。提示:使用``提供的`logf`函数。 + +**[TASK 11]** 仿照`Add`类构造`operators/autodiff.h`中的`Inv`类,能够对表达式$z = 1 / x$求导。 + +**[TASK 12]** 仿照`Add`类构造`operators/autodiff.h`中的`Sigmoid`类,能够对表达式$z = sigmoid(x)$求导。提示:使用``提供的`expf`函数。 + +#### 检查结果 + +做完了?很好,切换到`cc`,执行下面的语句来编译框架 + +``` +cmake -S . -B build +cd build +make +``` +如果你已经完成了01,那么环境变量应该是好的。否则,请回到01的实验手册,查看如何修改环境变量。 + +现在还有一个`frontend/framework/autodiff/test_task7.py`文件。切换到目录`frontend/framework/autodiff/`,直接运行相应的task文件,如果没有任何报错,说明你已经完成了这一关!🎉 \ No newline at end of file diff --git a/lab-guide/03-framework.md b/lab-guide/03-framework.md new file mode 100644 index 0000000..405a26b --- /dev/null +++ b/lab-guide/03-framework.md @@ -0,0 +1,125 @@ +### 第三部分:进入人工智能的世界 + +> 前两关是不是很简单? + +相信你在前两部分中,已经积累了足够多的C++知识,也回忆起了足够多的高等数学知识。现在,我们要构造一个框架,这个框架可以接受一个矩阵作为输入,并且支持神经网络中的常见的网络层,例如 + +- 线性层(Linear) +- 激活层(Activation) +- 损失层(Loss) + +#### 张量类 + +我们已经在`cc/tensor/tensor.h`中定义了张量类,这个类可以表示一个多维数组,并且支持常见的数学运算。我们可以在`cc/tensor/tensor.cc`中实现这些运算。当然,我们假定所有的张量都是二维的,这样你就不必考虑各种情况。 + +**[TASK 13]** 补全`cc/tensor/tensor.cc`中关于`Tensor::transpose()`的函数实现。它能够将一个张量进行转置。 + +**[TASK 14]** 补全`cc/tensor/tensor.cc`中关于`argmax(const std::shared_ptr& tensor, int axis)`的函数实现,它能够返回一个张量在指定维度上的最大值的索引。提示:你可以使用`std::numeric_limits::infinity()`,可以通过LLM来查询它的含义。 + +> 前面做了这么多次测试,你是不是该自己学会写测试了?...算了,还是我来帮你写吧...😂 + +测试文件:`frontend/framework/tensor/task13_14.py` + +**关于测试用例** 之后的内容的测试用例可以参考`frontend/uct/test`下的文件,或依据自己的需要编写。 + +#### 线性层 + +线性层是神经网络中最为常见的网络层,它接受一个输入张量,并且输出一个张量。输入两个张量`feature: (batch_size x input_features)`和`weight: (input_features x output_features)`,输出张量`output: (batch_size x output_features)`,实际上就是将`feature`矩阵和`weight`矩阵相乘。 + +用公式表示就是$y = Wx + b$。 + +**[TASK 15]** 补全`cc/operators/nn.h`中`Linear`类的构造函数和`forward`函数。 + +- 构造函数:构造函数接受两个参数`a`和`b`,它们都是`std::shared_ptr`类型的智能指针,分别表示输入特征和权重。构造函数调用基类`FunctionNode`的构造函数,并将`a`和`b`传递给它。在构造函数中,调用`this->forward()`方法,并将结果赋值给`this->data`。 + +- `forward`函数:参见有关线性层的介绍。 + + +**[TASK 16]** 补全`cc/operators/nn.cc`中`Linear`类的`backward`函数。 + +- `backward()`函数实现反向传播,计算梯度并返回。它接受`std::shared_ptr gradient`作为输入,你需要计算`grad_features`和`grad_weights`,它们分别表示对`features`和`weights`的梯度。 + +> 数学Tips:`grad_features`是通过将`gradient`与`weights`的转置相乘得到的。`grad_weights`是通过将`features`的转置与`gradient`相乘得到的。 + +完成了这两个任务后,你应该可以在`cc/`下执行 + +``` +cmake -S . -B build +cmake --build build +``` + +就能够编译你的代码。然后,你应当可以运行`frontend/uct/perception.py`,它将使用你实现的线性层来训练一个感知机。 + +#### 激活层 + +激活层是神经网络中常见的网络层,它接受一个输入张量,并且输出一个张量。输入一个张量`x`,输出一个张量`y`,实际上就是将`x`中的每个元素进行某种变换。 + +用公式表示就是$y = f(x)$。对于`ReLU`函数来说,$y = max(0, x)$。 + +**[TASK 17]** 补全`cc/operators/nn.h`中`ReLU`类的构造函数和`forward`函数。 + +- 构造函数:构造函数接受一个参数`a`,它是一个`std::shared_ptr`类型的智能指针,表示输入特征。构造函数调用基类`FunctionNode`的构造函数,并将`a`传递给它。在构造函数中,调用`this->forward()`方法,并将结果赋值给`this->data`。 + +- `forward`函数:参见有关激活层的介绍。 + +**[TASK 18]** 补全`cc/operators/nn.cc`中`ReLU`类的`backward`函数。 + +- `backward()`函数实现反向传播,计算梯度并返回。它接受`std::shared_ptr gradient`作为输入,你需要计算`grads`,它表示对`features`的梯度。 + +> 数学Tips:`grads`是通过将`gradient`与`x`中大于0的元素对应相乘得到的。 + +#### 偏置 + +线性层中,我们没有实现偏置项`b`,它是一个向量,它的维度与输出特征的维度相同。偏置项的作用是使得线性层的输出能够更好地拟合数据。 + +**[TASK 19]** 补全`cc/operators/nn.h`中`AddBias`类的构造函数和`forward`函数。 + +- 构造函数:构造函数接受两个参数`a`和`b`,它们都是`std::shared_ptr`类型的智能指针,分别表示输入特征和偏置。构造函数调用基类`FunctionNode`的构造函数,并将`a`和`b`传递给它。在构造函数中,调用`this->forward()`方法,并将结果赋值给 `this->data`。 + +- `forward`函数:`forward`方法实现前向传播,将偏置添加到输入特征上。`features`和`bias`分别从`this->objects`中获取,`features`的形状为`(batch_size x num_features)`,`bias`的形状为`(1 x num_features)`。在函数中,需要创建一个与`features`形状相同的输出张量`outNode`,使用嵌套循环将`features`的每个元素与`bias`的对应元素相加,结果存储在`outNode`中。最后,返回`outNode`。 + +**[TASK 20]** 补全`cc/operators/nn.cc`中`AddBias`类的`backward`函数。 + +- `backward()`函数实现反向传播,计算梯度并返回。它接受`std::shared_ptr gradient`作为输入,你需要计算`grad_features`和`grad_bias`,它们分别表示对`features`和`bias`的梯度。 + +> 数学Tips:`grad_features`和`grad_bias`都是`gradient`的拷贝。但是考虑到我们有`batch_size`的存在,因此,在计算`bias`的梯度时,需要将`gradient`的每一列相加,得到`grad_bias`的对应元素。 + +#### 损失层——均方误差损失函数 + +我们首先实现均方误差损失函数,它接受两个张量`y_pred`和`y_true`,它们分别表示预测值和真实值,输出一个标量,表示预测值与真实值之间的误差。 + +用公式表示就是$\displaystyle loss = \frac{1}{2} \sum_{i=1}^{n} (y_{pred} - y_{true})^2$。 + +**[TASK 21]** 补全`cc/operators/nn.h`中`SquareLoss`类的构造函数和`forward`函数。 + +- 构造函数:构造函数接受两个参数`a`和`b`,它们都是`std::shared_ptr`类型的智能指针,分别表示预测值和真实值。构造函数调用基类`FunctionNode`的构造函数,并将`a`和`b`传递给它。在构造函数中,调用`this->forward()`方法,并将结果赋值给`this->data`。 + +- `forward`函数用于计算损失。 + +**[TASK 22]** 补全`cc/operators/nn.cc`中`SquareLoss`类的`backward`函数。 + +- `backward`函数计算损失函数相对于输入`a`和`b`的梯度。`gradient`是损失函数对输出的梯度(是一个形状为(1, 1)的张量,可以直接认为其是一个向量`g`)。`grad_a`和`grad_b`分别存储`a`和`b`的梯度。对于每个元素,梯度计算为`g * (a->data->data[i] - b->data->data[i]) / a->data->size`。最终返回 grad_a 和 grad_b 的向量。 + +#### 损失层——SoftmaxLoss + +接下来,我们实现Softmax损失函数,它接受两个张量`y_pred`和`y_true`,它们分别表示预测值和真实值,输出一个标量,表示预测值与真实值之间的误差。 + +用公式表示就是$\displaystyle loss = -\sum_{i=1}^{n} y_{true} \log(y_{pred})$。 + +**[TASK 23]** 补全`cc/operators/nn.h`中`SoftmaxLoss`类的构造函数,`forward`函数和`backward`函数。 + +完成上述内容后,你可以编译和运行`frontend/uct/regression.py`,使用线性网络来拟合`sin`函数。 + +### 手写体识别 + +补全代码中的其他标注有`TODO`的内容,最后编译运行,你就将能够训练一个手写体识别模型。可以运行`frontend/uct/mnist.py`来试一下吧! + +> 是不是觉得运行得有点慢?考虑使用多线程来加速矩阵运算。(这已经超出了这门课的要求,对高性能计算/并行计算感兴趣的同学可以勇于尝试!) + +### extra bonus + +想打副本? + +``` +nslookup -type=txt uc-cpp.shahe.org +``` \ No newline at end of file