ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • [신경망] 뉴스 기사 분류 (Multiclass classification)
    머신러닝 & 딥러닝 2021. 12. 23. 17:19
    • 케라스에서 제공하는 로이터 데이터셋을 이용하여 총 46개의 토픽으로 분류하기
    from keras.datasets import reuters
    
    (train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)
    Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
    2113536/2110848 [==============================] - 0s 0us/step
    2121728/2110848 [==============================] - 0s 0us/step
    
    train_data
    array([list([1, 2, 2, 8, 43, 10, 447, 5, 25, 207, 270, 5, 3095, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 4579, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]),
           list([1, 3267, 699, 3434, 2295, 56, 2, 7511, 9, 56, 3906, 1073, 81, 5, 1198, 57, 366, 737, 132, 20, 4093, 7, 2, 49, 2295, 2, 1037, 3267, 699, 3434, 8, 7, 10, 241, 16, 855, 129, 231, 783, 5, 4, 587, 2295, 2, 2, 775, 7, 48, 34, 191, 44, 35, 1795, 505, 17, 12]),
           list([1, 53, 12, 284, 15, 14, 272, 26, 53, 959, 32, 818, 15, 14, 272, 26, 39, 684, 70, 11, 14, 12, 3886, 18, 180, 183, 187, 70, 11, 14, 102, 32, 11, 29, 53, 44, 704, 15, 14, 19, 758, 15, 53, 959, 47, 1013, 15, 14, 19, 132, 15, 39, 965, 32, 11, 14, 147, 72, 11, 180, 183, 187, 44, 11, 14, 102, 19, 11, 123, 186, 90, 67, 960, 4, 78, 13, 68, 467, 511, 110, 59, 89, 90, 67, 1390, 55, 2678, 92, 617, 80, 1274, 46, 905, 220, 13, 4, 346, 48, 235, 629, 5, 211, 5, 1118, 7, 2, 81, 5, 187, 11, 15, 9, 1709, 201, 5, 47, 3615, 18, 478, 4514, 5, 1118, 7, 232, 2, 71, 5, 160, 63, 11, 9, 2, 81, 5, 102, 59, 11, 17, 12]),
           ...,
           list([1, 141, 3890, 387, 81, 8, 16, 1629, 10, 340, 1241, 850, 31, 56, 3890, 691, 9, 1241, 71, 9, 5985, 2, 2, 699, 2, 2, 2, 699, 244, 5945, 4, 49, 8, 4, 656, 850, 33, 2993, 9, 2139, 340, 3371, 1493, 9, 2, 22, 2, 1094, 687, 83, 35, 15, 257, 6, 57, 9190, 7, 4, 5956, 654, 5, 2, 6191, 1371, 4, 49, 8, 16, 369, 646, 6, 1076, 7, 124, 407, 17, 12]),
           list([1, 53, 46, 957, 26, 14, 74, 132, 26, 39, 46, 258, 3614, 18, 14, 74, 134, 5131, 18, 88, 2321, 72, 11, 14, 1842, 32, 11, 123, 383, 89, 39, 46, 235, 10, 864, 728, 5, 258, 44, 11, 15, 22, 753, 9, 42, 92, 131, 728, 5, 69, 312, 11, 15, 22, 222, 2, 3237, 383, 48, 39, 74, 235, 10, 864, 276, 5, 61, 32, 11, 15, 21, 4, 211, 5, 126, 1072, 42, 92, 131, 46, 19, 352, 11, 15, 22, 710, 220, 9, 42, 92, 131, 276, 5, 59, 61, 11, 15, 22, 10, 455, 7, 1172, 137, 336, 1325, 6, 1532, 142, 971, 6463, 43, 359, 5, 4, 326, 753, 364, 17, 12]),
           list([1, 227, 2406, 91, 2, 125, 2855, 21, 4, 3976, 76, 7, 4, 757, 481, 3976, 790, 5259, 5654, 9, 111, 149, 8, 7, 10, 76, 223, 51, 4, 417, 8, 1047, 91, 6917, 1688, 340, 7, 194, 9411, 6, 1894, 21, 127, 2151, 2394, 1456, 6, 3034, 4, 329, 433, 7, 65, 87, 1127, 10, 8219, 1475, 290, 9, 21, 567, 16, 1926, 24, 4, 76, 209, 30, 4033, 6655, 5654, 8, 4, 60, 8, 4, 966, 308, 40, 2575, 129, 2, 295, 277, 1071, 9, 24, 286, 2114, 234, 222, 9, 4, 906, 3994, 8519, 114, 5758, 1752, 7, 4, 113, 17, 12])],
          dtype=object)
    
    train_data.shape
    (8982,)
    

    데이터 인코딩

    import numpy as np
    
    def vectorize_sequences(sequences, dimension=10000):
      results = np.zeros((len(sequences), dimension))
      for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
      return results
    
    x_train = vectorize_sequences(train_data)
    x_test =  vectorize_sequences(test_data)
    x_train
    array([[0., 1., 1., ..., 0., 0., 0.],
           [0., 1., 1., ..., 0., 0., 0.],
           [0., 1., 1., ..., 0., 0., 0.],
           ...,
           [0., 1., 1., ..., 0., 0., 0.],
           [0., 1., 1., ..., 0., 0., 0.],
           [0., 1., 1., ..., 0., 0., 0.]])
    
    x_train.shape
    (8982, 10000)
    
    from keras.utils.np_utils import to_categorical
    
    # 방법 1.
    one_hot_train_labels = to_categorical(train_labels)
    one_hot_test_labels = to_categorical(test_labels)

    모델 정의

    • 입력 데이터가 10000개
    • 은닉층은 2개, 활성화 함수 relu
    • 출력층 노드는 46개 (멀티 분류로 46개 클래스이므로) 및 활성화함수 소프트맥스
    from keras import models, layers
    
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(10000, )))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(46, activation='softmax'))
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    X_val = x_train[:1000]
    partial_X_train = x_train[1000:]
    
    y_val = one_hot_train_labels[:1000]
    partial_y_train = one_hot_train_labels[1000:]
    history = model.fit(partial_X_train,
                        partial_y_train,
                        epochs=20,
                        batch_size=512,
                        validation_data=(X_val, y_val))
    Epoch 1/20
    16/16 [==============================] - 2s 30ms/step - loss: 2.5903 - accuracy: 0.5362 - val_loss: 1.6930 - val_accuracy: 0.6510
    Epoch 2/20
    16/16 [==============================] - 0s 15ms/step - loss: 1.3925 - accuracy: 0.7066 - val_loss: 1.3201 - val_accuracy: 0.7160
    Epoch 3/20
    16/16 [==============================] - 0s 15ms/step - loss: 1.0530 - accuracy: 0.7776 - val_loss: 1.1446 - val_accuracy: 0.7540
    Epoch 4/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.8317 - accuracy: 0.8246 - val_loss: 1.0492 - val_accuracy: 0.7770
    Epoch 5/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.6651 - accuracy: 0.8619 - val_loss: 0.9888 - val_accuracy: 0.7930
    Epoch 6/20
    16/16 [==============================] - 0s 14ms/step - loss: 0.5327 - accuracy: 0.8913 - val_loss: 0.9592 - val_accuracy: 0.8020
    Epoch 7/20
    16/16 [==============================] - 0s 16ms/step - loss: 0.4320 - accuracy: 0.9121 - val_loss: 0.9257 - val_accuracy: 0.8130
    Epoch 8/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.3507 - accuracy: 0.9293 - val_loss: 0.9133 - val_accuracy: 0.8170
    Epoch 9/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.2909 - accuracy: 0.9385 - val_loss: 0.9251 - val_accuracy: 0.8100
    Epoch 10/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.2454 - accuracy: 0.9429 - val_loss: 0.9413 - val_accuracy: 0.8210
    Epoch 11/20
    16/16 [==============================] - 0s 16ms/step - loss: 0.2124 - accuracy: 0.9483 - val_loss: 0.9434 - val_accuracy: 0.8100
    Epoch 12/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.1870 - accuracy: 0.9518 - val_loss: 0.9364 - val_accuracy: 0.8150
    Epoch 13/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.1703 - accuracy: 0.9541 - val_loss: 0.9762 - val_accuracy: 0.8040
    Epoch 14/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.1513 - accuracy: 0.9541 - val_loss: 0.9907 - val_accuracy: 0.8120
    Epoch 15/20
    16/16 [==============================] - 0s 14ms/step - loss: 0.1392 - accuracy: 0.9545 - val_loss: 1.0371 - val_accuracy: 0.7940
    Epoch 16/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.1390 - accuracy: 0.9559 - val_loss: 1.0448 - val_accuracy: 0.8040
    Epoch 17/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.1290 - accuracy: 0.9563 - val_loss: 1.0308 - val_accuracy: 0.8080
    Epoch 18/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.1206 - accuracy: 0.9579 - val_loss: 1.0909 - val_accuracy: 0.8010
    Epoch 19/20
    16/16 [==============================] - 0s 15ms/step - loss: 0.1148 - accuracy: 0.9577 - val_loss: 1.1429 - val_accuracy: 0.7930
    Epoch 20/20
    16/16 [==============================] - 0s 14ms/step - loss: 0.1142 - accuracy: 0.9599 - val_loss: 1.0771 - val_accuracy: 0.8020
    
    import matplotlib.pyplot as plt
    
    history_dict = history.history
    loss = history_dict['loss']
    val_loss = history_dict['val_loss']
    
    epochs = range(1, len(loss)+1)
    
    plt.plot(epochs, loss, 'bo', label='Training Loss')
    plt.plot(epochs, val_loss, 'b', label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    plt.clf()
    history_dict = history.history
    acc = history_dict['accuracy']
    val_acc = history_dict['val_accuracy']
    
    plt.plot(epochs, acc, 'bo', label='Training Accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

    modle = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(10000, )))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(46, activation='softmax'))
    
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(partial_X_train,
              partial_y_train,
              epochs=9,
              batch_size=512,
              validation_data=(X_val, y_val))
    
    result = model.evaluate(x_test, one_hot_test_labels)
    result
    Epoch 1/9
    16/16 [==============================] - 1s 30ms/step - loss: 3.5425 - accuracy: 0.5353 - val_loss: 3.2796 - val_accuracy: 0.6300
    Epoch 2/9
    16/16 [==============================] - 0s 15ms/step - loss: 2.9231 - accuracy: 0.7256 - val_loss: 2.6189 - val_accuracy: 0.6490
    Epoch 3/9
    16/16 [==============================] - 0s 15ms/step - loss: 2.1800 - accuracy: 0.7308 - val_loss: 1.9657 - val_accuracy: 0.6460
    Epoch 4/9
    16/16 [==============================] - 0s 16ms/step - loss: 1.5601 - accuracy: 0.7310 - val_loss: 1.5861 - val_accuracy: 0.6510
    Epoch 5/9
    16/16 [==============================] - 0s 16ms/step - loss: 1.2235 - accuracy: 0.7313 - val_loss: 1.4586 - val_accuracy: 0.6510
    Epoch 6/9
    16/16 [==============================] - 0s 16ms/step - loss: 1.0531 - accuracy: 0.7364 - val_loss: 1.4527 - val_accuracy: 0.6610
    Epoch 7/9
    16/16 [==============================] - 0s 16ms/step - loss: 0.9534 - accuracy: 0.7518 - val_loss: 1.4573 - val_accuracy: 0.6680
    Epoch 8/9
    16/16 [==============================] - 0s 16ms/step - loss: 0.8911 - accuracy: 0.7543 - val_loss: 1.4101 - val_accuracy: 0.6710
    Epoch 9/9
    16/16 [==============================] - 0s 16ms/step - loss: 0.8471 - accuracy: 0.7562 - val_loss: 1.4161 - val_accuracy: 0.6730
    71/71 [==============================] - 0s 3ms/step - loss: 1.5025 - accuracy: 0.6545
    
    [1.502501130104065, 0.6544969081878662]
    

     

    댓글