Rabu, 19 Februari 2020

CNN , Data Training dan Data Uji

Aplikasi CNN
In [230]:
import numpy as np 
import pandas as pd
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
In [231]:
DATA_DIR = "img_new" 
TEST_DIR = "Data_Uji" 


TRAIN_TEST_SPLIT = 0.7
IM_WIDTH = IM_HEIGHT = 198
ID_TIPE_MAP = {0: 'Phospor', 1: 'Kalium', 2: 'Nitrogen', 3: 'Normal'}
TIPE_ID_MAP = dict((r, i) for i, r in ID_TIPE_MAP.items())

UJI_ID_TIPE_MAP = {0: 'Phospor', 1: 'Kalium', 2: 'Nitrogen', 3: 'Normal'}
UJI_TIPE_ID_MAP = dict((r, i) for i, r in UJI_ID_TIPE_MAP.items())

ID_TIPE_MAP, TIPE_ID_MAP
Out[231]:
({0: 'Phospor', 1: 'Kalium', 2: 'Nitrogen', 3: 'Normal'},
 {'Phospor': 0, 'Kalium': 1, 'Nitrogen': 2, 'Normal': 3})
In [232]:
def uk(img):
    #print(img)
    uk1=len(img)
    uk2=len(img[0])
    uk3=''
    try:
        m=len(img[0][0])
        uk3="x"+str(m)
    except:
        uk3=0
        uk3=""
    
    print(type(img))
    print(str(uk1)+"x"+str(uk2)+uk3)
    return "OK"

def parse_filepath(filepath):
    try:
        path, filename = os.path.split(filepath)
        filename, ext = os.path.splitext(filename)
        tipe, _ = filename.split("_") 
        
        return ID_TIPE_MAP[int(tipe)]
    except Exception as e:
        print('None:',filepath)
        return None
    
def uji_parse_filepath(filepath):
    try:
        path, filename = os.path.split(filepath)
        filename, ext = os.path.splitext(filename)
        tipe, _ = filename.split("_") 
        
        return UJI_ID_TIPE_MAP[int(tipe)]
    except Exception as e:
        print('None:',filepath)
        return None
    
def uji_parse_filepath0(filepath):
    try:
        path, filename = os.path.split(filepath)
        filename, ext = os.path.splitext(filename)
        uji_tipe=0
        return UJI_ID_TIPE_MAP[uji_tipe]
    except Exception as e:
        print('None:',filepath)
        return None
In [233]:
files = glob.glob(os.path.join(DATA_DIR, "*.jpg"))
attributes = list(map(parse_filepath, files))
df = pd.DataFrame(attributes)
df['file'] = files
df.columns = ['tipe', 'file']
df = df.dropna()

df.head()
df.describe()
df['tipe'].describe()
df.groupby(by=['tipe']).count().plot(kind='bar')

p = np.random.permutation(len(df))
df['tipe_id'] = df['tipe'].map(lambda ztipe: TIPE_ID_MAP[ztipe])
print('Banyak Data Training:',len(p))
all_idx=p
df.head()
#df.tail()
Banyak Data Training: 100
Out[233]:
tipe file tipe_id
0 Phospor img_new\0_pho.001.jpg 0
1 Phospor img_new\0_pho.002.jpg 0
2 Phospor img_new\0_pho.003.jpg 0
3 Phospor img_new\0_pho.004.jpg 0
4 Phospor img_new\0_pho.005.jpg 0
Membentuk Matrik
In [235]:
from keras.utils import to_categorical
from PIL import Image

def get_data_generator(df, indices, for_training, batch_size=16):
    images, ltipe = [], []
    while True:
        for i in indices:
            r = df.iloc[i]
            file, t= r['file'],r['tipe_id']
            im = Image.open(file)
            im = im.resize((IM_WIDTH, IM_HEIGHT))
            im = np.array(im) / 255.0
            images.append(im)
            ltipe.append(to_categorical(t, len(TIPE_ID_MAP)))
            if len(images) >= batch_size:
                yield np.array(images), [np.array(ltipe)]
                images,ltipe = [],[]
        if not for_training:
            print("No")
            break

Model Convolution

In [236]:
from keras.layers import Input, Dense, BatchNormalization, Conv2D, MaxPool2D, GlobalMaxPool2D, Dropout
from keras.optimizers import SGD
from keras.models import Model

def conv_block(inp, filters=32, bn=True, pool=True):
    _ = Conv2D(filters=filters, kernel_size=3, activation='relu')(inp)
    if bn:
        _ = BatchNormalization()(_)
    if pool:
        _ = MaxPool2D()(_)
    return _

input_layer = Input(shape=(IM_HEIGHT, IM_WIDTH, 3))
_ = conv_block(input_layer, filters=32, bn=False, pool=False)
_ = conv_block(_, filters=32*2)
_ = conv_block(_, filters=32*3)
_ = conv_block(_, filters=32*4)
_ = conv_block(_, filters=32*5)
_ = conv_block(_, filters=32*6)
bottleneck = GlobalMaxPool2D()(_)

# for tipe prediction
_ = Dense(units=128, activation='relu')(bottleneck)
tipe_output = Dense(units=len(TIPE_ID_MAP), activation='softmax', name='tipe_output')(_)


model = Model(inputs=input_layer, outputs=[tipe_output])
model.compile(optimizer='rmsprop', 
              loss={'tipe_output': 'categorical_crossentropy'},
              loss_weights={'tipe_output': 1.5},
              metrics={'tipe_output': 'accuracy'})
model.summary()
Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_5 (InputLayer)         (None, 198, 198, 3)       0         
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 196, 196, 32)      896       
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 194, 194, 64)      18496     
_________________________________________________________________
batch_normalization_21 (Batc (None, 194, 194, 64)      256       
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 97, 97, 64)        0         
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 95, 95, 96)        55392     
_________________________________________________________________
batch_normalization_22 (Batc (None, 95, 95, 96)        384       
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 47, 47, 96)        0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 45, 45, 128)       110720    
_________________________________________________________________
batch_normalization_23 (Batc (None, 45, 45, 128)       512       
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 22, 22, 128)       0         
_________________________________________________________________
conv2d_29 (Conv2D)           (None, 20, 20, 160)       184480    
_________________________________________________________________
batch_normalization_24 (Batc (None, 20, 20, 160)       640       
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 10, 10, 160)       0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 8, 8, 192)         276672    
_________________________________________________________________
batch_normalization_25 (Batc (None, 8, 8, 192)         768       
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 4, 4, 192)         0         
_________________________________________________________________
global_max_pooling2d_5 (Glob (None, 192)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               24704     
_________________________________________________________________
tipe_output (Dense)          (None, 4)                 516       
=================================================================
Total params: 674,436
Trainable params: 673,156
Non-trainable params: 1,280
_________________________________________________________________
In [ ]:

Pemodelan Data Training

In [237]:
from keras.callbacks import ModelCheckpoint

data_size =len(all_idx)
all_gen = get_data_generator(df, all_idx, for_training=True, batch_size=data_size)
valid_gen = get_data_generator(df, all_idx, for_training=True, batch_size=data_size)

callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]
print(all_gen)
print(valid_gen)


GENERATE = model.fit_generator(all_gen,steps_per_epoch=len(all_idx)/data_size,epochs=5,callbacks=callbacks,validation_data=valid_gen,validation_steps= len(all_idx)/data_size)
#GENERATE = model.fit_generator(all_gen,steps_per_epoch=1,epochs=1,callbacks=callbacks,validation_data=valid_gen,validation_steps=1)

print(model.metrics_names)
print(GENERATE)
<generator object get_data_generator at 0x0000021D01042138>
<generator object get_data_generator at 0x0000021D01042048>
Epoch 1/5
4/4 [==============================] - 86s 22s/step - loss: 6.8969 - acc: 0.3300 - val_loss: 1.4379 - val_acc: 0.6700
Epoch 2/5
4/4 [==============================] - 80s 20s/step - loss: 1.5445 - acc: 0.6200 - val_loss: 1.9955 - val_acc: 0.5400
Epoch 3/5
4/4 [==============================] - 78s 20s/step - loss: 1.0087 - acc: 0.7200 - val_loss: 1.1763 - val_acc: 0.6700
Epoch 4/5
4/4 [==============================] - 74s 19s/step - loss: 1.3623 - acc: 0.6800 - val_loss: 1.1182 - val_acc: 0.7100
Epoch 5/5
4/4 [==============================] - 74s 18s/step - loss: 0.5274 - acc: 0.8500 - val_loss: 0.5452 - val_acc: 0.8500
['loss', 'acc']
<keras.callbacks.History object at 0x0000021D089E0C18>
In [238]:
MM=model.evaluate_generator(all_gen, steps=11) # steps=len(test_idx)//128
dict(zip(model.metrics_names, MM)) 
Out[238]:
{'loss': 0.5612496300177141, 'acc': 0.8436363718726418}
In [240]:
x_test, tipe_true= next(all_gen)  
tipe_pred= model.predict_on_batch(x_test)
print(len(tipe_true) ,'x', len(tipe_true[0])) #1 x 25 <class 'list'>
print(len(tipe_pred),'x',len(tipe_pred[0])) #25 x 4 <class 'numpy.ndarray'>
print(len(x_test),'x',len(x_test[0])) #25 x 198 <class 'numpy.ndarray'>
1 x 25
25 x 4
25 x 198
In [241]:
LT1=tipe_true[0].tolist()
AT1=np.asarray(tipe_true[0], dtype=np.float32)

LT2=tipe_pred.tolist()
AT2=tipe_pred
In [242]:
tipe_true =AT1.argmax(axis=-1)
tipe_pred =AT2.argmax(axis=-1)
In [243]:
from sklearn.metrics import classification_report
print("Olah Data:")
print(classification_report(tipe_true,tipe_pred))
Olah Data:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.88      1.00      0.93         7
           2       0.83      1.00      0.91         5
           3       1.00      0.67      0.80         6

    accuracy                           0.92        25
   macro avg       0.93      0.92      0.91        25
weighted avg       0.93      0.92      0.92        25

In [244]:
import math
training_n =data_size # len(all_idx) #Sebanyak Data Training
random_indices = np.random.permutation(training_n)

benar=0
no=0
for i, img_idx in enumerate(random_indices):
    no=no+1
    T=tipe_true[img_idx]
    P=tipe_pred[img_idx]
    V='No'
    if T==P:
        V='Yes'
        benar=benar+1
    print(str(no), ' ',str(img_idx),' ', str(T),' ', str(P),' ', str(V))
    
print('Benar:',str(benar), ' dari ',str(no),' dataUji')
acc=benar/n
print('Acc:',str(acc))
1   0   0   0   Yes
2   1   0   0   Yes
3   21   0   0   Yes
4   7   1   1   Yes
5   20   0   0   Yes
6   14   0   0   Yes
7   24   1   1   Yes
8   10   3   3   Yes
9   12   0   0   Yes
10   8   2   2   Yes
11   17   0   0   Yes
12   2   3   3   Yes
13   13   3   3   Yes
14   16   1   1   Yes
15   23   1   1   Yes
16   3   2   2   Yes
17   11   1   1   Yes
18   15   3   3   Yes
19   22   3   2   No
20   18   1   1   Yes
21   5   2   2   Yes
22   4   3   1   No
23   19   2   2   Yes
24   9   1   1   Yes
25   6   2   2   Yes
Benar: 23  dari  25  dataUji
Acc: 0.92
In [261]:
#SHOW DATA TRAINING
plt.clf()
n_cols = 5
n_rows = math.ceil(training_n / n_cols)
no=0
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 20))
for i, img_idx in enumerate(random_indices):
    no=no+1
    T=tipe_true[img_idx]
    P=tipe_pred[img_idx]
    
    ax = axes.flat[i]
    ax.imshow(x_test[img_idx])
    #ax.set_title('{}'.format(ID_TIPE_MAP[P]))
    ax.set_xlabel('{}.{} ? {}'.format(str(no), ID_TIPE_MAP[T],ID_TIPE_MAP[P]))
    
    ax.set_xticks([])
    ax.set_yticks([])  
<Figure size 432x288 with 0 Axes>

Pemodelan UJI

In [249]:
uji_files = glob.glob(os.path.join(TEST_DIR, "*.jpg"))
uji_attributes = list(map(uji_parse_filepath, uji_files))
uji_df = pd.DataFrame(uji_attributes)
uji_df['file'] = uji_files
uji_df.columns = ['tipe', 'file']
uji_df = uji_df.dropna()

uji_df.head()
uji_df.describe()
uji_df['tipe'].describe()
uji_df.groupby(by=['tipe']).count().plot(kind='bar')

uji_p = np.random.permutation(len(uji_df))
uji_df['tipe_id'] = uji_df['tipe'].map(lambda xtipe: UJI_TIPE_ID_MAP[xtipe])
print('Banyak Data Uji:',len(uji_p))
uji_idx=uji_p
uji_df.head()
#uji_df.tail()
Banyak Data Uji: 37
Out[249]:
tipe file tipe_id
0 Phospor Data_Uji\0_pho.001.jpg 0
1 Phospor Data_Uji\0_pho.002.jpg 0
2 Kalium Data_Uji\1_kal.006.jpg 1
3 Kalium Data_Uji\1_kal.007.jpg 1
4 Nitrogen Data_Uji\2_nit.026.jpg 2
In [250]:
#UJI DATA UJI
uji_data_size=len(uji_idx)
uji_gen = get_data_generator(uji_df, uji_idx, for_training=True, batch_size=uji_data_size)
#GENERATE_UJI = model.fit_generator(uji_gen,steps_per_epoch=len(uji_idx)/uji_data_size,epochs=5,callbacks=callbacks,validation_data=valid_gen,validation_steps= len(uji_idx)/uji_data_size)
GENERATE_UJI = model.fit_generator(uji_gen,steps_per_epoch=1,epochs=1,callbacks=callbacks,validation_data=valid_gen,validation_steps=1)
Epoch 1/1
1/1 [==============================] - 33s 33s/step - loss: 0.5503 - acc: 0.8649 - val_loss: 4.4078 - val_acc: 0.3200
In [252]:
print(model.metrics_names)
print(uji_gen)


UJI_M=model.evaluate_generator(uji_gen, steps=11) # steps=len(test_idx)//128
dict(zip(model.metrics_names, UJI_M)) 
['loss', 'acc']
<generator object get_data_generator at 0x0000021D01042318>
Out[252]:
{'loss': 2.061514139175415, 'acc': 0.7297297120094299}
In [253]:
uji_x_test, uji_tipe_true= next(uji_gen)  
uji_tipe_pred= model.predict_on_batch(uji_x_test)
print(len(uji_tipe_true) ,'x', len(uji_tipe_true[0])) #1 x 25 <class 'list'>
print(len(uji_tipe_pred),'x',len(uji_tipe_pred[0])) #25 x 4 <class 'numpy.ndarray'>
print(len(uji_x_test),'x',len(uji_x_test[0])) #25 x 198 <class 'numpy.ndarray'>

LT1=uji_tipe_true[0].tolist()
AT1=np.asarray(uji_tipe_true[0], dtype=np.float32)

LT2=uji_tipe_pred.tolist()
AT2=uji_tipe_pred

uji_tipe_true =AT1.argmax(axis=-1)
uji_tipe_pred =AT2.argmax(axis=-1)

from sklearn.metrics import classification_report
print("Olah Data Uji:")
print(classification_report(uji_tipe_true,uji_tipe_pred))
1 x 37
37 x 4
37 x 198
Olah Data Uji:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         2
           2       0.00      0.00      0.00         5
           3       0.77      0.96      0.86        28

    accuracy                           0.73        37
   macro avg       0.19      0.24      0.21        37
weighted avg       0.58      0.73      0.65        37

C:\Users\USER\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
C:\Users\USER\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
C:\Users\USER\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
In [254]:
uji_n =uji_data_size # len(all_idx) #Sebanyak Data Training
uji_random_indices = np.random.permutation(uji_n)

benar=0
no=0
for i, uji_img_idx in enumerate(uji_random_indices):
    no=no+1
    T=uji_tipe_true[uji_img_idx]
    P=uji_tipe_pred[uji_img_idx]
    V='No'
    if T==P:
        V='Yes'
        benar=benar+1
    print(str(no), ' ',str(uji_img_idx),' ', str(T),' ', str(P),' ', str(V))
    
print('Benar:',str(benar), ' dari ',str(no),' dataUji')
acc=benar/n
print('Acc Uji:',str(acc))
1   10   3   3   Yes
2   19   2   3   No
3   24   3   3   Yes
4   17   3   3   Yes
5   9   3   3   Yes
6   31   3   3   Yes
7   23   3   3   Yes
8   7   3   3   Yes
9   36   3   3   Yes
10   30   0   3   No
11   4   3   3   Yes
12   29   3   3   Yes
13   18   3   3   Yes
14   14   3   3   Yes
15   6   0   1   No
16   28   3   3   Yes
17   21   2   3   No
18   11   3   3   Yes
19   8   3   3   Yes
20   20   3   3   Yes
21   22   3   3   Yes
22   33   2   3   No
23   15   3   3   Yes
24   32   2   3   No
25   5   1   3   No
26   35   3   3   Yes
27   25   3   3   Yes
28   1   3   3   Yes
29   2   3   3   Yes
30   13   3   3   Yes
31   16   2   3   No
32   34   3   3   Yes
33   27   3   3   Yes
34   0   3   1   No
35   26   1   3   No
36   3   3   3   Yes
37   12   3   3   Yes
Benar: 27  dari  37  dataUji
Acc Uji: 1.08
In [260]:
#SHOW DATA TRAINING
plt.clf()
n_cols = 5
n_rows = math.ceil(uji_n / n_cols)
no=0
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 20))
for i, uji_img_idx in enumerate(uji_random_indices):
    no=no+1
    T=uji_tipe_true[uji_img_idx]
    P=uji_tipe_pred[uji_img_idx]
    ax = axes.flat[i]
    ax.imshow(uji_x_test[uji_img_idx])
    #ax.set_title('{}'.format(UJI_ID_TIPE_MAP[P]))
    ax.set_xlabel('{}.{} ? {}'.format(str(no), UJI_ID_TIPE_MAP[T],UJI_ID_TIPE_MAP[P]))
    
    ax.set_xticks([])
    ax.set_yticks([])  
<Figure size 432x288 with 0 Axes>
In [ ]: