#include "blackcat/tensors.h" #include "blackcat/neural_networks.h" #include "../examples/datasets/mnist_loader.h" #include #include auto make_lstm_network() { return bc::nn::neuralnetwork( bc::nn::lstm(bc::host_tag(), 96 * 10, 1024, bc::nn::adam), bc::nn::lstm(bc::host_tag(), 1024, 512, bc::nn::adam), bc::nn::lstm(bc::host_tag(), 512, 216, bc::nn::adam), bc::nn::feedforward(bc::host_tag(), 216, 192), bc::nn::logistic(bc::host_tag(), 192), bc::nn::logging_output_layer(bc::host_tag(), 192, bc::nn::RMSE).skip_every(100) ); } using network_type = decltype(make_lstm_network()); typedef struct _LstmPredictTask { network_type m_pnetwork = make_lstm_network(); //�������Net void reset_neural_network() { m_pnetwork = std::move(make_lstm_network()); } } LstmPredictTask; template int percept_MNIST(System system_tag, std::string mnist_dataset, int epochs=10, int batch_size=32, int samples=32*1024) { using value_type = typename System::default_floating_point_type; using allocator_type = bc::Allocator; using cube = bc::Cube; using mat = bc::Matrix; using clock = std::chrono::duration; auto network = bc::nn::neuralnetwork( bc::nn::lstm(system_tag, 784/4, 128), bc::nn::lstm(system_tag, 128, 64), bc::nn::feedforward(system_tag, 64, 10), bc::nn::softmax(system_tag, 10), bc::nn::logging_output_layer(system_tag, 10, bc::nn::RMSE).skip_every(100) ); bc::print("Neural Network architecture:"); bc::print(network.get_string_architecture()); network.set_learning_rate(0.001); network.set_batch_size(batch_size); std::pair data = load_mnist( system_tag, mnist_dataset, batch_size, samples); cube& inputs = data.first; cube& outputs = data.second; bc::print("training..."); auto start = std::chrono::system_clock::now(); int img_partitions = 4; for (int i = 0; i < epochs; ++i){ bc::print("current epoch: ", i); for (int j = 0; j < samples/batch_size; j++) { for (int p = 0; p < img_partitions; ++p) { auto batch = inputs[j]; auto index = bc::dim(0,784 * (p/(float)img_partitions)); auto shape = bc::dim(784/4, batch_size); network.forward_propagation(batch[{index, shape}]); } //Apply backprop on the last two images (images 3/4 and 4/4) network.back_propagation(outputs[j]); network.back_propagation(outputs[j]); network.update_weights(); } } auto end = std::chrono::system_clock::now(); bc::print("training time:", clock(end - start).count()); bc::print("testing..."); network.copy_training_data_to_single_predict(0); auto batch = inputs[0]; auto shape = bc::dim(784/4, batch_size); for (int p = 0; p < img_partitions-1; ++p) { auto index = bc::dim(0, 784*(p/(float)img_partitions)); network.predict(batch[{index, shape}]); } auto last_index = bc::dim(0,784*((img_partitions-1)/(float)img_partitions)); mat hyps =network.predict(batch[{last_index, shape}]); bc::size_t test_images = 10; cube img = cube(inputs[0].reshaped(28,28, batch_size)); for (int i = 0; i < test_images; ++i) { mat(bc::logical(img[i].t())).print_sparse(0); hyps[i].print(); bc::print("------------------------------------"); } bc::print("success"); return 0; }