티스토리 뷰

CNN은 colab에서 GPU로 돌리기

%reset

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

# Raw Data Loading
df = pd.read_csv('/content/drive/My Drive/MachineLearning/train.csv')

# 결측치와 이상치는 없어요!!
# Data Split(Train data와 Test data 분리)
x_data_train, x_data_test, t_data_train, t_data_test = \
train_test_split(df.drop('label', axis=1, inplace=False),
                 df['label'],
                 test_size=0.3,
                 random_state=0)

# Min-Max Normalization
scaler = MinMaxScaler()
scaler.fit(x_data_train)
x_data_train_norm = scaler.transform(x_data_train)
x_data_test_norm = scaler.transform(x_data_test)

del x_data_train, x_data_test

### Tensorflow implementation ###\
sess = tf.Session()

t_data_train_onehot = sess.run(tf.one_hot(t_data_train, depth=10))
t_data_test_onehot = sess.run(tf.one_hot(t_data_test, depth=10))

# Placeholder
X = tf.placeholder(shape=[None,784], dtype=tf.float32)
T = tf.placeholder(shape=[None,10], dtype=tf.float32)
drop_rate = tf.placeholder(dtype=tf.float32)

# Convolution

# 입력데이터 형태부터 설정
x_img = tf.reshape(X,[-1, 28, 28, 1])   # (이미지개수, height, width, channel)

# convolution layer 1
W1 = tf.Variable(tf.random.normal([3,3,1,32]))  # (filter height, 
                                                #  filter width, 
                                                #  filter channel, 
                                                #  filter 개수)
L1 = tf.nn.conv2d(x_img,W1, strides=[1,1,1,1], padding='SAME')
L1 = tf.nn.relu(L1)   # 이 작업의 결과 => activation map (None,28,28,32)

# pooling layer1
L1 = tf.nn.max_pool(L1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
print('L1의 shape : {}'.format(L1.shape))

# convolution layer 2
W2 = tf.Variable(tf.random.normal([3,3,32,64]))  # (filter height, 
                                                 #  filter width, 
                                                 #  filter channel, 
                                                 #  filter 개수)
L2 = tf.nn.conv2d(L1,W2, strides=[1,1,1,1], padding='SAME')
L2 = tf.nn.relu(L2)   # 이 작업의 결과 => activation map 

# pooling layer 2
L2 = tf.nn.max_pool(L2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
print('L2의 shape : {}'.format(L2.shape))  # (?, 7, 7, 64)

# FC에 넣어서 학습을 진행해야 해요!
# 그래서 FC layer에 넣기 위해 데이터를 Flatten처리(1차원으로 만들어요!)

L2 = tf.reshape(L2, [-1,7*7*64])

# Weight & bias
W3 = tf.get_variable('weight3', shape=[7*7*64,256],
                     initializer=tf.contrib.layers.variance_scaling_initializer())
b3 = tf.Variable(tf.random.normal([256]))

_layer3 = tf.nn.relu(tf.matmul(L2,W3) + b3)
layer3 = tf.nn.dropout(_layer3, rate=drop_rate)

W4 = tf.get_variable('weight4', shape=[256,10],
                     initializer=tf.contrib.layers.variance_scaling_initializer())
b4 = tf.Variable(tf.random.normal([10]))

# Hypothesis
logit = tf.matmul(layer3,W4) + b4
H = tf.nn.softmax(logit)

# loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit,
                                                                 labels=T))

# train
train = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)

# parameter
num_of_epoch = 200
batch_size = 100

# 학습
def run_train(sess, train_x, train_t):
    print('### Starting Training ###')
    # 초기화
    sess.run(tf.global_variables_initializer())
    
    for step in range(num_of_epoch):
        total_batch = int(train_x.shape[0] / batch_size)

        for i in range(total_batch):
            batch_x = train_x[i*batch_size:(i+1)*batch_size]
            batch_t = train_t[i*batch_size:(i+1)*batch_size]
            _, loss_val = sess.run([train,loss], feed_dict={X:batch_x, 
                                                            T:batch_t, 
                                                            drop_rate:0.4})

        if step % 20 == 0:
            print('Loss : {}'.format(loss_val))
    print('### End Training ###')


    
# Accuracy
predict = tf.argmax(H,1)

# sklearn의 classification_report를 이용한 성능평가
run_train(sess,x_data_train_norm,t_data_train_onehot)
target_names=['num 0','num 1','num 2','num 3','num 4','num 5','num 6','num 7','num 8','num 9']
print(classification_report(t_data_test,
                            sess.run(predict,
                                     feed_dict={X:x_data_test_norm, 
                                                drop_rate:0 }),
                            target_names=target_names))                                                

 

Output:

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
L1의 shape : (?, 14, 14, 32)
L2의 shape : (?, 7, 7, 64)
WARNING:tensorflow:
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

### Starting Training ###
Loss : 0.24228335916996002
Loss : 0.0082903066650033
Loss : 0.000761454866733402
Loss : 0.004933550488203764
Loss : 0.0020892054308205843
Loss : 4.406860898598097e-05
Loss : 1.3708174719795352e-06
Loss : 3.082882903981954e-05
Loss : 0.0
Loss : 7.27174054304669e-08
### End Training ###
              precision    recall  f1-score   support

       num 0       0.99      0.99      0.99      1242
       num 1       0.99      0.99      0.99      1429
       num 2       0.98      0.99      0.99      1276
       num 3       0.99      0.99      0.99      1298
       num 4       0.99      0.97      0.98      1236
       num 5       0.99      0.99      0.99      1119
       num 6       0.99      0.99      0.99      1243
       num 7       0.99      0.98      0.99      1334
       num 8       0.99      0.99      0.99      1204
       num 9       0.97      0.99      0.98      1219

    accuracy                           0.99     12600
   macro avg       0.99      0.99      0.99     12600
weighted avg       0.99      0.99      0.99     12600
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
«   2024/09   »
1 2 3 4 5 6 7
8 9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
29 30
글 보관함