main

%23%20%2F%2F%2F%20script%0A%23%20requires-python%20%3D%20%22%3E%3D3.11%22%0A%23%20dependencies%20%3D%20%5B%0A%23%20%20%20%20%20%22marimo%22%2C%0A%23%20%20%20%20%20%22numpy%22%2C%0A%23%20%20%20%20%20%22pandas%22%2C%0A%23%20%5D%0A%23%20%5Bmarimo.export%5D%0A%23%20format%20%3D%20%22html%22%20%20%23%20or%20%22html%22%20for%20static%0A%23%20show_code%20%3D%20true%20%20%20%20%20%20%23%20or%20false%0A%23%20%2F%2F%2F%0A%0Aimport%20marimo%0A%0A__generated_with%20%3D%20%220.21.1%22%0Aapp%20%3D%20marimo.App(width%3D%22medium%22)%0A%0A%0A%40app.cell%0Adef%20_()%3A%0A%20%20%20%20import%20marimo%20as%20mo%0A%20%20%20%20import%20numpy%20as%20np%0A%20%20%20%20import%20pandas%20as%20pd%0A%20%20%20%20import%20matplotlib.pyplot%20as%20plt%0A%20%20%20%20from%20dataclasses%20import%20dataclass%0A%20%20%20%20from%20typing%20import%20List%0A%20%20%20%20import%20io%0A%20%20%20%20import%20zipfile%0A%20%20%20%20import%20urllib.request%0A%0A%20%20%20%20return%20List%2C%20dataclass%2C%20io%2C%20np%2C%20pd%2C%20plt%2C%20urllib%2C%20zipfile%0A%0A%0A%40app.cell%0Adef%20_(io%2C%20pd%2C%20urllib%2C%20zipfile)%3A%0A%20%20%20%20nrows%20%3D%2060000%0A%20%20%20%20train_url%20%3D%20(%0A%20%20%20%20%20%20%20%20%22https%3A%2F%2Fraw.githubusercontent.com%2Fphoebetronic%2Fmnist%2Fmain%2Fmnist_train.csv.zip%22%0A%20%20%20%20)%0A%20%20%20%20test_url%20%3D%20(%0A%20%20%20%20%20%20%20%20%22https%3A%2F%2Fraw.githubusercontent.com%2Fphoebetronic%2Fmnist%2Fmain%2Fmnist_test.csv.zip%22%0A%20%20%20%20)%0A%0A%20%20%20%20with%20urllib.request.urlopen(train_url)%20as%20response%3A%0A%20%20%20%20%20%20%20%20with%20zipfile.ZipFile(io.BytesIO(response.read()))%20as%20zip_ref%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20with%20zip_ref.open(%22mnist_train.csv%22)%20as%20f%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20train_df%20%3D%20pd.read_csv(f%2C%20header%3DNone%2C%20nrows%3Dnrows).to_numpy()%0A%0A%20%20%20%20with%20urllib.request.urlopen(test_url)%20as%20response%3A%0A%20%20%20%20%20%20%20%20with%20zipfile.ZipFile(io.BytesIO(response.read()))%20as%20zip_ref%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20with%20zip_ref.open(%22mnist_test.csv%22)%20as%20f%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20test_df%20%3D%20pd.read_csv(f%2C%20header%3DNone%2C%20nrows%3Dnrows).to_numpy()%0A%20%20%20%20return%20test_df%2C%20train_df%0A%0A%0A%40app.cell%0Adef%20_(train_df)%3A%0A%20%20%20%20x_train%20%3D%20train_df%5B%3A%2C%201%3A%5D%20%2F%20255.0%0A%20%20%20%20y_train%20%3D%20train_df%5B%3A%2C%200%5D%0A%20%20%20%20print(x_train.shape%2C%20y_train.shape)%0A%20%20%20%20return%20x_train%2C%20y_train%0A%0A%0A%40app.cell%0Adef%20_(test_df)%3A%0A%20%20%20%20x_test%20%3D%20test_df%5B%3A%2C%201%3A%5D%20%2F%20255.0%0A%20%20%20%20y_test%20%3D%20test_df%5B%3A%2C%200%5D%0A%20%20%20%20return%20x_test%2C%20y_test%0A%0A%0A%40app.cell%0Adef%20_(dataclass%2C%20np)%3A%0A%20%20%20%20%40dataclass%0A%20%20%20%20class%20myDense%3A%0A%20%20%20%20%20%20%20%20weights%3A%20np.ndarray%0A%20%20%20%20%20%20%20%20bias%3A%20np.ndarray%0A%20%20%20%20%20%20%20%20activation%3A%20str%20%3D%20%22RELU%22%0A%0A%20%20%20%20%20%20%20%20x%20%3D%20None%0A%20%20%20%20%20%20%20%20z%20%3D%20None%0A%20%20%20%20%20%20%20%20grad_w%20%3D%20None%0A%20%20%20%20%20%20%20%20grad_b%20%3D%20None%0A%0A%20%20%20%20%20%20%20%20def%20forward(self%2C%20x%3A%20np.ndarray)%20-%3E%20np.ndarray%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20self.x%20%3D%20x%0A%20%20%20%20%20%20%20%20%20%20%20%20self.z%20%3D%20x%20%40%20self.weights%20%2B%20self.bias%0A%20%20%20%20%20%20%20%20%20%20%20%20if%20self.activation%20%3D%3D%20%22RELU%22%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20return%20np.maximum(0%2C%20self.z)%0A%20%20%20%20%20%20%20%20%20%20%20%20elif%20self.activation%20%3D%3D%20%22SOFTMAX%22%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20exp_z%20%3D%20np.exp(self.z%20-%20np.max(self.z%2C%20axis%3D1%2C%20keepdims%3DTrue))%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20return%20exp_z%20%2F%20np.sum(exp_z%2C%20axis%3D1%2C%20keepdims%3DTrue)%0A%20%20%20%20%20%20%20%20%20%20%20%20else%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20raise%20ValueError(f%22Unsupported%20activation%20function%3A%20%7Bself.activation%7D%22)%0A%0A%20%20%20%20%20%20%20%20def%20backward(self%2C%20grad_output%3A%20np.ndarray)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20if%20self.activation%20%3D%3D%20%22RELU%22%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20activation_grad%20%3D%20(self.z%20%3E%200).astype(float)%0A%20%20%20%20%20%20%20%20%20%20%20%20elif%20self.activation%20%3D%3D%20%22SOFTMAX%22%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20activation_grad%20%3D%20np.ones_like(self.z)%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20part_l_part_z%20%3D%20grad_output%20*%20activation_grad%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20self.grad_w%20%3D%20self.x.T%20%40%20part_l_part_z%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20self.grad_b%20%3D%20np.sum(part_l_part_z%2C%20axis%3D0%2C%20keepdims%3DTrue)%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20grad_input%20%3D%20part_l_part_z%20%40%20self.weights.T%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20return%20grad_input%0A%0A%20%20%20%20%20%20%20%20def%20update_weights(self%2C%20learning_rate%3A%20float%2C%20reg_strength%3A%20float%20%3D%200.0)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20grad_w_reg%20%3D%20self.grad_w%20%2B%20reg_strength%20*%20self.weights%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20self.weights%20-%3D%20learning_rate%20*%20grad_w_reg%0A%20%20%20%20%20%20%20%20%20%20%20%20self.bias%20-%3D%20learning_rate%20*%20self.grad_b%0A%0A%20%20%20%20return%20(myDense%2C)%0A%0A%0A%40app.cell%0Adef%20_(List%2C%20dataclass%2C%20myDense%2C%20np)%3A%0A%20%20%20%20%40dataclass%0A%20%20%20%20class%20NeuralNetwork%3A%0A%20%20%20%20%20%20%20%20layers%3A%20List%5BmyDense%5D%0A%0A%20%20%20%20%20%20%20%20def%20__post_init__(self)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20for%20i%20in%20range(len(self.layers)%20-%201)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20if%20(%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20self.layers%5Bi%5D.weights.shape%5B1%5D%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20!%3D%20self.layers%5Bi%20%2B%201%5D.weights.shape%5B0%5D%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20raise%20ValueError(%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20f%22Layer%20%7Bi%7D%20output%20size%20%7Bself.layers%5Bi%5D.weights.shape%5B1%5D%7D%20%22%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20f%22does%20not%20match%20Layer%20%7Bi%20%2B%201%7D%20input%20size%20%7Bself.layers%5Bi%20%2B%201%5D.weights.shape%5B0%5D%7D%22%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20)%0A%0A%20%20%20%20%20%20%20%20def%20forward(self%2C%20x)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20for%20layer%20in%20self.layers%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20x%20%3D%20layer.forward(x)%0A%20%20%20%20%20%20%20%20%20%20%20%20return%20x%0A%0A%20%20%20%20%20%20%20%20def%20backward(self%2C%20loss_grad)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20grad%20%3D%20loss_grad%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20for%20layer%20in%20reversed(self.layers)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20grad%20%3D%20layer.backward(grad)%0A%0A%20%20%20%20%20%20%20%20def%20update_weights(self%2C%20learning_rate%3A%20float%2C%20reg_strength%3A%20float%20%3D%200.0)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20for%20layer%20in%20self.layers%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20layer.update_weights(learning_rate%2C%20reg_strength)%0A%0A%20%20%20%20%20%20%20%20def%20train_step(self%2C%20x_batch%2C%20y_batch_one_hot%2C%20learning_rate%2C%20reg_strength)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20output%20%3D%20self.forward(x_batch)%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20log_output%20%3D%20np.log(output%20%2B%201e-15)%0A%20%20%20%20%20%20%20%20%20%20%20%20loss%20%3D%20-np.sum(y_batch_one_hot%20*%20log_output)%20%2F%20x_batch.shape%5B0%5D%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20loss_grad%20%3D%20(output%20-%20y_batch_one_hot)%20%2F%20x_batch.shape%5B0%5D%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20self.backward(loss_grad)%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20self.update_weights(learning_rate%2C%20reg_strength)%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20return%20loss%0A%0A%20%20%20%20%20%20%20%20def%20train_network(%0A%20%20%20%20%20%20%20%20%20%20%20%20self%2C%20x_train%2C%20y_train%2C%20num_epochs%2C%20batch_size%2C%20learning_rate%2C%20reg_strength%0A%20%20%20%20%20%20%20%20)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20y_train_one_hot%20%3D%20np.eye(10)%5By_train%5D%0A%20%20%20%20%20%20%20%20%20%20%20%20num_batches%20%3D%20len(x_train)%20%2F%2F%20batch_size%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20for%20epoch%20in%20range(num_epochs)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20epoch_loss%20%3D%200%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20for%20batch_idx%20in%20range(num_batches)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20start%20%3D%20batch_idx%20*%20batch_size%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20end%20%3D%20start%20%2B%20batch_size%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20batch_loss%20%3D%20self.train_step(%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20x_train%5Bstart%3Aend%5D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20y_train_one_hot%5Bstart%3Aend%5D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20learning_rate%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20reg_strength%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20epoch_loss%20%2B%3D%20batch_loss%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20avg_loss%20%3D%20epoch_loss%20%2F%20num_batches%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20if%20(epoch%20%2B%201)%20%25%2020%20%3D%3D%200%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20print(f%22Epoch%20%7Bepoch%20%2B%201%7D%2C%20Avg%20Loss%3A%20%7Bavg_loss%3A.4f%7D%22)%0A%0A%20%20%20%20%20%20%20%20def%20test_network(self%2C%20x_test%2C%20y_test)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20output%20%3D%20self.forward(x_test)%0A%20%20%20%20%20%20%20%20%20%20%20%20predictions%20%3D%20np.argmax(output%2C%20axis%3D1)%0A%20%20%20%20%20%20%20%20%20%20%20%20accuracy%20%3D%20np.mean(predictions%20%3D%3D%20y_test)%0A%20%20%20%20%20%20%20%20%20%20%20%20print(f%22Test%20Accuracy%3A%20%7Baccuracy%20*%20100%3A.2f%7D%25%22)%0A%20%20%20%20%20%20%20%20%20%20%20%20return%20predictions%0A%0A%20%20%20%20return%20(NeuralNetwork%2C)%0A%0A%0A%40app.cell%0Adef%20_(NeuralNetwork%2C%20myDense%2C%20np)%3A%0A%20%20%20%20input_size_1%20%3D%20784%0A%20%20%20%20output_size_1%20%3D%20128%0A%20%20%20%20weights_1%20%3D%20np.random.rand(input_size_1%2C%20output_size_1)%20*%200.01%0A%20%20%20%20bias_1%20%3D%20np.zeros((1%2C%20output_size_1))%0A%20%20%20%20layer_1%20%3D%20myDense(weights%3Dweights_1%2C%20bias%3Dbias_1%2C%20activation%3D%22RELU%22)%0A%0A%20%20%20%20input_size_2%20%3D%20output_size_2%20%3D%20128%0A%20%20%20%20weights_2%20%3D%20np.random.rand(input_size_2%2C%20output_size_2)%20*%200.01%0A%20%20%20%20bias_2%20%3D%20np.zeros((1%2C%20output_size_2))%0A%20%20%20%20layer_2%20%3D%20myDense(weights%3Dweights_2%2C%20bias%3Dbias_2%2C%20activation%3D%22RELU%22)%0A%0A%20%20%20%20input_size_3%20%3D%20128%0A%20%20%20%20output_size_3%20%3D%2010%0A%20%20%20%20weights_3%20%3D%20np.random.rand(input_size_3%2C%20output_size_3)%20*%200.01%0A%20%20%20%20bias_3%20%3D%20np.zeros((1%2C%20output_size_3))%0A%20%20%20%20layer_3%20%3D%20myDense(weights%3Dweights_3%2C%20bias%3Dbias_3%2C%20activation%3D%22SOFTMAX%22)%0A%0A%20%20%20%20neural_network%20%3D%20NeuralNetwork(layers%3D%5Blayer_1%2C%20layer_2%2C%20layer_3%5D)%0A%20%20%20%20return%20(neural_network%2C)%0A%0A%0A%40app.cell%0Adef%20_(neural_network%2C%20x_train%2C%20y_train)%3A%0A%20%20%20%20neural_network.train_network(%0A%20%20%20%20%20%20%20%20x_train%2C%0A%20%20%20%20%20%20%20%20y_train%2C%0A%20%20%20%20%20%20%20%20num_epochs%3D100%2C%0A%20%20%20%20%20%20%20%20batch_size%3D64%2C%0A%20%20%20%20%20%20%20%20learning_rate%3D0.1%2C%0A%20%20%20%20%20%20%20%20reg_strength%3D0.001%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(neural_network%2C%20x_test%2C%20y_test)%3A%0A%20%20%20%20predictions%20%3D%20neural_network.test_network(x_test%2C%20y_test)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(neural_network%2C%20np%2C%20plt%2C%20x_test%2C%20y_test)%3A%0A%20%20%20%20def%20visualize_predictions(nn%2C%20num_samples%3D10%2C%20show%3D%22both%22)%3A%0A%20%20%20%20%20%20%20%20test_output_3%20%3D%20nn.forward(x_test)%0A%20%20%20%20%20%20%20%20predictions%20%3D%20np.argmax(test_output_3%2C%20axis%3D1)%0A%20%20%20%20%20%20%20%20confidences%20%3D%20np.max(test_output_3%2C%20axis%3D1)%0A%20%20%20%20%20%20%20%20true_labels%20%3D%20y_test%0A%0A%20%20%20%20%20%20%20%20if%20show%20%3D%3D%20%22correct%22%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20mask%20%3D%20predictions%20%3D%3D%20true_labels%0A%20%20%20%20%20%20%20%20elif%20show%20%3D%3D%20%22false%22%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20mask%20%3D%20predictions%20!%3D%20true_labels%0A%20%20%20%20%20%20%20%20else%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20mask%20%3D%20np.ones(len(true_labels)%2C%20dtype%3Dbool)%0A%0A%20%20%20%20%20%20%20%20indices%20%3D%20np.where(mask)%5B0%5D%5B%3Anum_samples%5D%0A%0A%20%20%20%20%20%20%20%20num_cols%20%3D%20min(5%2C%20len(indices))%0A%20%20%20%20%20%20%20%20num_rows%20%3D%20(len(indices)%20%2B%204)%20%2F%2F%205%0A%0A%20%20%20%20%20%20%20%20fig%2C%20axes%20%3D%20plt.subplots(num_rows%2C%20num_cols%2C%20figsize%3D(15%2C%203%20*%20num_rows))%0A%20%20%20%20%20%20%20%20axes%20%3D%20axes.flatten()%0A%0A%20%20%20%20%20%20%20%20for%20plot_idx%2C%20sample_idx%20in%20enumerate(indices)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20ax%20%3D%20axes%5Bplot_idx%5D%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20image%20%3D%20x_test%5Bsample_idx%5D.reshape(28%2C%2028)%0A%20%20%20%20%20%20%20%20%20%20%20%20ax.imshow(image%2C%20cmap%3D%22gray%22)%0A%20%20%20%20%20%20%20%20%20%20%20%20ax.axis(%22off%22)%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20true%20%3D%20true_labels%5Bsample_idx%5D%0A%20%20%20%20%20%20%20%20%20%20%20%20pred%20%3D%20predictions%5Bsample_idx%5D%0A%20%20%20%20%20%20%20%20%20%20%20%20conf%20%3D%20confidences%5Bsample_idx%5D%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20color%20%3D%20%22red%22%20if%20true%20!%3D%20pred%20else%20%22green%22%0A%20%20%20%20%20%20%20%20%20%20%20%20title%20%3D%20f%22True%3A%20%7Bint(true)%7D%5CnPred%3A%20%7Bpred%7D%5CnConf%3A%20%7Bconf%3A.2f%7D%22%0A%20%20%20%20%20%20%20%20%20%20%20%20ax.set_title(title%2C%20fontsize%3D10%2C%20color%3Dcolor%2C%20fontweight%3D%22bold%22)%0A%0A%20%20%20%20%20%20%20%20for%20idx%20in%20range(len(indices)%2C%20len(axes))%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20axes%5Bidx%5D.axis(%22off%22)%0A%0A%20%20%20%20%20%20%20%20plt.tight_layout()%0A%20%20%20%20%20%20%20%20plt.show()%0A%0A%20%20%20%20print(%22%5Cn%3D%3D%3D%20VISUALIZATION%20%3D%3D%3D%22)%0A%20%20%20%20print(%22Displaying%20predictions%20(showing%20both%20correct%20and%20incorrect)...%22)%0A%20%20%20%20visualize_predictions(neural_network%2C%20num_samples%3D10%2C%20show%3D%22both%22)%0A%20%20%20%20return%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A