티스토리 뷰
1. 최적화 기법 비교
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
from common.optimizer import SGD, Momentum, AdaGrad, Adam
def f(x, y):
return x**2 / 20.0 + y**2
def df(x,y):
return x / 10.0, 2.0*y
init_pos = (-7.0, 2.0)
params = {}
params['x'], params['y'] = init_pos[0], init_pos[1]
grads = {}
grads['x'], grads['y'] = 0, 0
optimizers = OrderedDict()
optimizers['SGD'] = SGD(lr=0.95)
optimizers['Momentum'] = Momentum(lr=0.1)
optimizers['AdaGrad'] = AdaGrad(lr=1.5)
optimizers['Adam'] = Adam(lr=0.3)
idx = 1
for key in optimizers:
optimizer = optimizers[key]
x_history = []
y_history = []
params['x'], params['y'] = init_pos[0], init_pos[1]
for i in range(30):
x_history.append(params['x'])
y_history.append(params['y'])
grads['x'], grads['y'] = df(params['x'], params['y'])
optimizer.update(params, grads)
x = np.arange(-10, 10, 0.01)
y = np.arange(-5, 5, 0.01)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)
mask = Z > 7
Z[mask] = 0
plt.subplot(2, 2, idx)
idx += 1
plt.plot(x_history, y_history, 'o-')
plt.contour(X, Y, Z)
plt.xlim(-10,10)
plt.ylim(-10,10)
plt.plot(0,0, '*')
plt.title(key)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
2. MNIST 데이터셋에 대한 학습 진도 속도 비교
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
from common.optimizer import SGD, Momentum, AdaGrad, Adam
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet
from dataset.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()
networks = {}
train_loss = {}
for key in optimizers.keys():
networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100,100,100,100],
output_size=10)
train_loss[key] = []
for i in range(max_iterations):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
for key in optimizers.keys():
grads = networks[key].gradient(x_batch, t_batch)
optimizers[key].update(networks[key].params, grads)
loss = networks[key].loss(x_batch, t_batch)
train_loss[key].append(loss)
if i % 100 == 0:
for key in optimizers.keys():
loss = networks[key].loss(x_batch, t_batch)
markers = {'SGD':'o', 'Momentum':'x', 'AdaGrad':'s', 'Adam':'D'}
x = np.arange(max_iterations)
for key in optimizers.keys():
plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key)
plt.xlabel("iterations")
plt.ylabel("loss")
plt.ylim(0,1)
plt.legend()
plt.show()
코드 참고 : github.com/WegraLee/deep-learning-from-scratch
댓글