# -*- coding: utf-8 -*- """Handwriting.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1JkEJqSYdmXZ6VIyO7ApsNswlzietHSNE 18-100 Lab 10: Machine Learning This lab is the starter code for the Machine Learning Lab. Please read through it to get a general understanding of what it does. But, you don't need to write any code, unless you want to. Instead, once you have a sense as to what the code is doing, you can focus your attention on parameterizing the neural network and training process. To do this, you probably want to focus on the section labelled, "These parameters control some parameters of the neural network and its training". Please see the lab write up for more detail. """ # This needs to be done once for each newly connected/allocated session. # It can be skipped after that via "Runtime-->Run [beginning with and] after" # Or, it can be commented in and out, as needed. # Install required libraries !pip install -q git+https://github.com/tensorflow/docs !pip install tensorflowjs # These parameters control different aspects of the neural network and its training # These are the parameters you most likely want to explore TRAIN_SIZE= 5000 # This is the total pool of examples presentable to the network for training BATCH_SIZE= 3 # This is the total number of examples the network should look at before making adjustments EPOCHS= 20 # This is the number of epochs, i.e. batches of the training set HIDDEN_LAYERS=2 # The number of densely hidden layers within the neural network HIDDEN_LAYER_SIZE=128 # The number of neurons in each dense layer. For simplicity, each dense layer is set up to be the same LEARNING_RATE_COEFF= 1.3 # Control the rate at which adjustments to weights are made: <1 slows, 1 is default, and >1 exaggerates # Feel free to change this, if you'd like, but doing so may not be particularly illuminating for this lab TEST_SIZE=100 # This is the number of never-seen-before images used to evaluate the performance after training is complete. Do not lower this number to improve your results. # Commented out IPython magic to ensure Python compatibility. # Imports to bring in libraries we need and sometimes give them shorthand aliases import numpy import matplotlib.pyplot as plt import tensorflow as tf import tensorflow_docs as tfdocs import tensorflow_docs.modeling import tensorflow_docs.plots import tensorflow as tf import datetime from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense from keras.layers import Dropout from keras.layers import Flatten from keras.layers.convolutional import Conv2D from keras.layers.convolutional import MaxPooling2D from tensorflow.keras.optimizers import Adam from keras.utils import np_utils from keras.callbacks import LearningRateScheduler from google.colab import files # Load the TensorBoard notebook extension so we can visualize the netowrk and its training # %load_ext tensorboard # load data # This uses a method, e.g. code, provided with the data set # It automatically loads both training and validation tuples (training_images, training_labels), (validation_images, validation_labels) = mnist.load_data() # This limits the amount of data we use from each of the training and test sets to # the amount requested by the parameters provided for you to edit. training_images = training_images[0:TRAIN_SIZE] training_labels = training_labels[0:TRAIN_SIZE] validation_images = validation_images[0:TEST_SIZE] validation_labels = validation_labels[0:TEST_SIZE] print (training_images.shape) print (validation_images.shape) # This shows us an example of what an image lookes like plt.figure() # Create a new empty figure plt.imshow(training_images[0]) # Load the image into the figure plt.colorbar() # Add a color bar to the right of the figure to show the range of colors plt.grid(False) # Don't draw a graph paper -like grid plt.show() # Display the image on the screen print ("Label: " + str(training_labels[0])) # Put a text caption on the image to show its given "ground truth" label # Normalize inputs from 0-255 to 0.0-1.0 # In other words divide every pixel by 255 so the range is from 0-1 instead of 0-255 # This is because the neural netowrk expects inputs to be in the range from 0-1 training_images = training_images / 255 validation_images = validation_images / 255 # Goes through the same process as above to redisplay the image. # Note that the color bar is now 0.0-1.0 plt.figure() plt.imshow(training_images[0]) plt.colorbar() plt.grid(False) plt.show() print ("Label: " + str(training_labels[0])) # Convert numerical labels to "One-Hot Encoding" # This means we encode each category's values as a separate boolean output variable # So,instead of having one output encoded as 0:0/255, 1:1/255, 2:2/255, etc, # each category gets a different output, Fso for example, consider 0, 1, and 2: # 0: {1, 0, 0} # 1: {0, 1, 0} # 2: {0, 0, 1} # This is important to do because There is no numerical relationship between the # categories. In other word, a 1.5 wouldn't mean half "1" and half "2". It would # just be hard to interpret and not good for training. number_of_classes = 10 training_labels = np_utils.to_categorical(training_labels, number_of_classes) validation_labels = np_utils.to_categorical(validation_labels, number_of_classes) # Define the architecture of the neural network model # Define the model sequentially, layer by layer model = Sequential() # Add input layer, flattening out the 28x28 2D image into a 1D vector on its way into the network model.add(Flatten()) # Add hidden layer(s) # A dense layer is trhe basic fully connected later. # "relu" is a "rectified linear unit", which is a long way of saying it makes anything negative into a 0 # There isn't such a thing as a negative color, so inhibition (negative weight) isn't likely to be helpful. for dense_layer in range (HIDDEN_LAYERS): model.add(Dense(HIDDEN_LAYER_SIZE, activation='relu')) # Add output layer # The output layer is "softmax", which basically means that it takes the # various outputs and makes them sum up to 1.0, so that they can be interpreted sort of like probabilities # rather than us having to look at the whole set to interpret how much stronger or weaker one output is than others model.add(Dense(number_of_classes, activation='softmax')) # Compile model # This is, in some ways, like compiling a program. It takes the model above from a definiton to a usable instance # Categorical cross-entropy is a way of measuring error in situations where, as is the case with one-hot encoding, outputs are boolean and indicate membership in a single category # The "adam" optimizer is a form of gradient descent, i.e. a way for the network to assign blame for error and adjust weights by back propogation # Accuracy is a metric that measures the correctness of the categorization model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) # Create a log directory where the neural network can store data for later visualization by TensorBoard log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Set up "callback" so that model.fit feeds data into the log diretory for TensorBoard as it trains # Note that this initializes the callback with the log directory created above and will log with each epoch, i.e. histogram_freq = 1 tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) # Fit, a.k.a. train, the model def custom_learning_rate(epoch, lrate): return LEARNING_RATE_COEFF*lrate lrs_callback = LearningRateScheduler(custom_learning_rate) model.fit(training_images, training_labels, validation_data=(validation_images, validation_labels), epochs=EPOCHS, shuffle=True, batch_size=BATCH_SIZE, callbacks=[tensorboard_callback,lrs_callback]) # Commented out IPython magic to ensure Python compatibility. # Start TensorBoard so we can visualize how the training unfolded # Be patient. You may have a blank window for up to 1-2 minutes (or, possibly more) while the TensorBoard processes the log files and prepares the visualizations # Set "Smoothing" to 0.0 so that you see the data. # Fee free to uncheck old runs (or run the cell that comes later to delete them) # %tensorboard --logdir logs/fit # Get the final validation loss and accuracy as a simple "score", so you know how you did metrics = model.evaluate(validation_images, validation_labels, verbose=0) print("Metrics(Test loss & Test Accuracy): ") print(metrics) # Save the model so it can be used again without needing to retrain model.save('handwriting.h5') # Convert model for use via a Web page. !mkdir model # Make a directory for the model, it is okay to ignore the error if it already exists !tensorflowjs_converter --input_format keras handwriting.h5 model/ # Convert the model and put it into the directory # Archive the model directory into a .zip file for easy downloading # Uncomment this when instructed in the lab handout. !zip -r handwriting.zip model # Uncomment the lines below to download the .zip archive of the model from co-lab onto own computer for eventual upload to Web page. from google.colab import files files.download('handwriting.zip') # Uncomment the line below and run this cell if you'd like to delete all of your existing tensor flow logs. #!rm -rf ./logs/ # You can also modify the line above to delete only certain logs by adding the date-stamped subdirectory using the date-stamp from TensorBoard, e.g. # !rm -rf ./logs/fit/20201123-023901/train/ # Training logs from a particular run # !rm -rf ./logs/fit/20201123-023901/validation/ # Validation logs from a particular run #!rm -rf ./logs/fit/20201123-023901/ # Both training and validation logs from a particular run """CREDITS: The code below is based upon the following blog: * https://blog.tanka.la/2018/05/27/handwritten-digit-prediction-using-convolutional-neural-networks-in-tensorflow-with-keras-and-live-example-using-tensorflow-js/ It was updated to save and download models as shown here: * https://blog.tensorflow.org/2018/07/train-model-in-tfkeras-with-colab-and-run-in-browser-tensorflowjs.html And, inspired by the link below, updated to show metrics for underfit and overfit: * https://www.tensorflow.org/tutorials/keras/overfit_and_underfit Those metrics, however, were ultimately implemented with TensorBoard based upon the link below: * https://www.tensorflow.org/tensorboard/get_started The architecture was also simplified to be more accessible based upon this: * https://www.tensorflow.org/tutorials/keras/classification """