Tensorflow
Keras
import math
import numpy as np
import pandas as pd
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
%matplotlib inline
sns.set()
Целта на графа е да вкараме всички операции от примера и да имаме автоматично диференциране и ъпдейтване на теглата.
forward
- пресямата операцията.backward
- пресмята и запомня градиентите за текущата операция.update weights
- обновява теглата с помощтта на "the update rule".forward
за всеки от елементите и ще запази стойностотите им.backward pass
, изпълнявайки го върху елементите в обратен ред.update_wights
ще извадим пресметнатите градиенти от съответните им тегла.$ \hat y = g(X W_1 + b) W_2 + b_2$
$ op_1 = X W_1 + b $
$ op_2 = \sigma( op_1) $
$ op_3 = op_2 * W_2 + b_2 = \hat y $
$ op_4 = J(op_3) $, където $J(W) = \frac {1}{2} \big ( \hat y - y \big ) ^2 $
MSE
¶class MSE:
def __init__(self, y):
self.y = y
def forward(self, X):
self.X = X.ravel()
first_term = 1. / (2. * len(X))
norm = np.linalg.norm(self.y - X)
self.value = first_term * np.square(norm)
return self.value
def backward(self, _):
dX = self.X - self.y
return dX
Linear Unit
, който пресмята wx + b
или op1
и op3
от горния пример.¶backward pass
на графа.¶$$\frac {\partial op_4} {\partial W_2 } = \frac {\partial } {\partial W_2 } \frac {1} {2} \big ( op3 - y \big )^2 = $$
$$ = (op3 - y) \frac {\partial } {\partial W_2 } \big ( op3 - y\big ) =$$
$$ = (op3 - y) \frac {\partial } {\partial W_2 } \big ( op_2 * W_2 + b_2 - y\big ) =$$
$$ = (op3 - y) op_2 $$
class Linear:
def __init__(self, x_dim, h_dim, name=None):
self.W = np.random.randn(x_dim, h_dim)
self.b = np.random.randn(h_dim)
self.name = name
def forward(self, X):
self.X = X
self.values = np.dot(X, self.W) + self.b
return self.values
def backward(self, dZ):
self.db = np.dot(np.ones((1, dZ.shape[0]), dtype=np.float64), dZ)
self.dW = np.dot(np.transpose(self.X), dZ)
if dZ.ndim == 1:
dZ = np.expand_dims(dZ, axis=1)
self.dX = dZ @ np.transpose(self.W)
return self.dX
def update(self, alpha):
self.W += - alpha * self.dW.reshape(self.W.shape)
self.b += - alpha * self.db.ravel()
__init__
подаваме x_dim
и h_dim
.¶x_dim
- колко фичъра има в x
.h_dim
- колко скрити неврона искаме да има в новия слой.h_dim
може да бъде 1
за последния слой.
X
по W
ще получим нова матрица. X
(семплите), а броя на колоните ще са новите неврони (фичъри) в скрития слой.Ред по колона :D
Sigmoid
¶Тук нещата са малко по-тривиални
class Sigmoid:
def forward(self, X):
self.values = 1.0 / (1.0 + np.exp(-X))
return self.values
def backward(self, dZ):
return (1.0 - self.values) * self.values * dZ
print("\n"*4)
print(self.name)
print(f'self.db {self.db}')
print(f'self.X: {self.X}')
print(f'dZ: {dZ}')
print(f'self.dW: {self.dW}')
print(f"self.W, {self.W}")
print("\n"*4)
from sklearn.base import BaseEstimator
class NeuralNetwork(BaseEstimator):
def __init__(self, model, alpha=0.01, iterations=100):
self.alpha = alpha
self.iterations = iterations
self.model = model
def fit(self, X, y=None):
model = self.model
self.errors = []
for i in range(self.iterations):
z = X
for e in self.model:
z = e.forward(z)
if isinstance(e, MSE):
self.errors.append(e.value)
dZ = None
for e in self.model[::-1]:
dZ = e.backward(dZ)
for e in self.model:
if hasattr(e, 'update'):
e.update(self.alpha)
return self
def predict(self, X):
z = X
for e in model[:-1]:
z = e.forward(z)
return z
Ще генерираме dummy dataset
с който да екпсериментираме.
X = np.array([
[1, 2, 3],
[-1, -2, -3]
])
y = np.array([0, 1])
np.random.seed(1)
model = [
Linear(3, 5, "Linear 1"),
Sigmoid(),
# Linear(5, 4, "Linear 2"),
# Sigmoid(),
Linear(5, 1, "Linear 3"),
MSE(y)
]
nn = NeuralNetwork(model)
nn.fit(X)
nn.predict(X)
plt.plot(nn.errors);
def plot_different_lr():
plt.figure(figsize=(12,12))
for i, lr in enumerate([0.001, 0.01, 0.1, 0.4, 0.5, 0.6]):
np.random.seed(1)
model = [Linear(3, 5, "Linear 1"), Sigmoid(), Linear(5, 1, "Linear 3"), MSE(y)]
nn = NeuralNetwork(model, alpha=lr)
nn.fit(X)
plt.subplot(2,3, i+1)
plt.title(lr)
plt.plot(nn.errors);
Learning Curves
) с различни стойности на alpha
(learning rate):¶plot_different_lr()
LinearRegression
от sklearn
¶За целта ще използваме boston house price
данните.
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
boston = load_boston()
X = boston.data
X = StandardScaler().fit(X).transform(X)
y = boston.target
x_train, x_test, y_train, y_test = train_test_split(X, y)
regressor = LinearRegression().fit(x_train, y_train)
print("train score:", regressor.score(x_train, y_train))
print("test score:", regressor.score(x_test, y_test))
Първо трябва да видим какви са размерите на входните фичъри.
x_train.shape
np.random.seed(1)
model = [
Linear(13, 100, "Linear 1"),
Sigmoid(),
Linear(100, 50, "Linear 2"),
Sigmoid(),
Linear(50, 1, "Linear 3"),
MSE(y_train)
]
nn = NeuralNetwork(model, alpha=0.0001, iterations=1000)
nn.fit(x_train);
plt.plot(nn.errors);
print(r2_score(y_train, nn.predict(x_train)))
print(r2_score(y_test, nn.predict(x_test)))
relu
softmax
log loss
, среща се и с името categorical cross entropy
decaying lr
и momentum
Linear
елемент да се подава само едно измерение - това за бр. неврони в скрития слой. Другото ще се смята автоматично в първото изивкване на forward
от подаденото x
l1
и l2
. Всеки Linear
елемент може да има различни стойности за l1
и l2
. * Само ако Стефан напише автоматизирани тестове за проверка!
pip install tensorflow==1.3.0
pip install tensorflow-gpu==1.3.0
защото с последната версия (1.4.0):
RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
return f(*args, **kwds)
import tensorflow as tf
# Model parameters
W = tf.Variable([0.3], dtype=tf.float32)
b = tf.Variable([-0.3], dtype=tf.float32)
# Model input and output
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
linear_model = W*x + b
# loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # initialize W and b with 0.3 and -0.3
for i in range(1000):
sess.run(train, {x: x_train, y: y_train})
# evaluate training accuracy
curr_W, curr_b, curr_loss = sess.run([W, b, loss], {x: x_train, y: y_train})
print(f"W: {curr_W} b: {curr_b} loss: {curr_loss}")
# y = -1 * x + 1
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
print(mnist.train.images.shape)
plt.figure(figsize=(12,8))
for i in range(5):
plt.subplot(1,5,i+1)
plt.imshow(mnist.train.images[i].reshape(28, 28))
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
y = tf.matmul(x,W) + b
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
for _ in range(1000):
batch = mnist.train.next_batch(100)
train_step.run(feed_dict={x: batch[0], y_: batch[1]})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
W1 = tf.Variable(tf.random_normal([784,200], seed=5))
b1 = tf.Variable(tf.zeros([200]))
h1 = tf.nn.relu(tf.matmul(x, W1) + b1)
W2 = tf.Variable(tf.zeros([200,10]))
b2 = tf.Variable(tf.zeros([10]))
y = tf.matmul(h1, W2) + b2
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
sess.run(tf.global_variables_initializer())
for _ in range(1000):
batch = mnist.train.next_batch(100)
train_step.run(feed_dict={x: batch[0], y_: batch[1]})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
Как дебъгваме грешки с измеренията:
print(x.get_shape())
print(W1.get_shape())
print(b1.get_shape())
!rm -r summary
sess.close()
sess = tf.Session()
def variable_summaries(var):
"""Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
with tf.name_scope('summaries'):
mean = tf.reduce_mean(var)
tf.summary.scalar('mean', mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
tf.summary.scalar('stddev', stddev)
tf.summary.scalar('max', tf.reduce_max(var))
tf.summary.scalar('min', tf.reduce_min(var))
tf.summary.histogram('histogram', var)
def linear_layer(x, output_size, activation=None, layer_name="layer"):
with tf.name_scope(layer_name):
samples, features_count = x.get_shape().as_list()
W = tf.Variable(tf.random_uniform([features_count, output_size], seed=5), name="W")
b = tf.Variable(tf.zeros([output_size]), name="b")
h = tf.matmul(x, W) + b
if activation:
h = activation(h)
with tf.name_scope("W"):
variable_summaries(W)
with tf.name_scope("b"):
variable_summaries(b)
return h
h1 = linear_layer(x, 500, activation=tf.nn.relu, layer_name="L1")
h2 = linear_layer(h1, 100, activation=tf.nn.relu, layer_name="L2")
y = linear_layer(h2, 10, layer_name="L3")
with tf.name_scope('cross_entropy'):
diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
with tf.name_scope('total'):
cross_entropy = tf.reduce_mean(diff)
tf.summary.scalar('cross_entropy', cross_entropy)
with tf.name_scope('train'):
train_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy)
with tf.name_scope('accuracy'):
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter("summary/train", sess.graph)
test_writer = tf.summary.FileWriter("summary/test", sess.graph)
sess.run(tf.global_variables_initializer())
for i in range(1000):
if i % 10 == 0:
test_batch = mnist.test.next_batch(1000)
summary, accuracy_ = sess.run([merged, accuracy], feed_dict={x: test_batch[0], y_: test_batch[1]})
test_writer.add_summary(summary, i)
else:
batch = mnist.train.next_batch(100)
summary, _ = sess.run([merged, train_step], feed_dict={x: batch[0], y_: batch[1]})
train_writer.add_summary(summary, i)
tensorboard --logdir=path/to/log-directory
Keras is a high-level neural networks API, written in Python and capable of running on top of TensorFlow
, CNTK
, or Theano
. It was developed with a focus on enabling fast experimentation. Being able to go from idea to result with the least possible delay is key to doing good research.
Use Keras if you need a deep learning library that:
Read the documentation at Keras.io.
Keras is compatible with: Python 2.7-3.6.
pip install keras
Керас има два режима:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop, Adam
Sequential
модела на Keras
¶Първо ще заредим същите данни (mnist
), но през Keras
.
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
num_classes = 10
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_test
3-слойна NN + dropout
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=Adam(),
metrics=['accuracy'])
batch_size = 256
epochs = 10
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True).create(prog='dot', format='svg',))
sudo pip install pydot-ng
brew install graphviz
Позволя създаването на други архитектури, вместо feed-forward мрежи.
from keras.layers import Input, Concatenate
from keras.models import Model
# This returns a tensor
inputs = Input(shape=(784,))
# a layer instance is callable on a tensor, and returns a tensor
x = Dense(64, activation='relu', name='L1_64_relu')(inputs)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu', name='L2_64_relu')(x)
x = Dropout(0.2)(x)
h = Dense(128, activation='elu', name="L1_128_elu")(inputs)
h = Dropout(0.5)(h)
x = Concatenate()([x, h])
predictions = Dense(10, activation='softmax', name='Softmax')(x)
# This creates the model
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, batch_size=128, epochs=10);
score = model.evaluate(x_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
SVG(model_to_dot(model, show_layer_names=True, show_shapes=True).create(prog='dot', format='svg'))
# pip install h5py
model.save('model.hdf5')
!ls -lh model.hdf5
new_model = keras.models.load_model('model.hdf5')
score = new_model.evaluate(x_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
y_hat = new_model.predict(x_test[:4])
print(y_hat)
print(np.argmax(y_hat, axis=1))
print(np.argmax(y_test[:4], axis=1))
keras.optimizers
¶[c for c in dir(keras.optimizers) if c[0].isupper()]
keras.layers
¶[c for c in dir(keras.layers) if c[0].isupper()]
* Unsupervised NN:
* Autoencoders
* Word Embeddings
Ако има останало време, ще погледнем документацията на керас.