본문 바로가기
AI

포켓몬 분류 해보기

by 코낄2 2024. 1. 14.
 

Pokemon Generation One

Gotta train 'em all!

www.kaggle.com

 

Complete Pokemon Image Dataset

2,500+ clean labeled images, all official art, for Generations 1 through 8.

www.kaggle.com

import os

os.environ['KAGGLE_USERNAME'] = '아이디'
os.environ['KAGGLE_KEY'] = "배포받은 키"

# 데이터셋 다운로드
!kaggle datasets download -d thedagger/pokemon-generation-one
!kaggle datasets download -d hlrhegemony/pokemon-image-dataset

# 압축풀기
!unzip -q pokemon-generation-one.zip
!unzip -q pokemon-image-dataset.zip
# dataset 디렉토리를 train으로 이름을 변경
!mv dataset train
# images 디렉토리를 validation으로 이름을 변경
!mv images validation

 

- train 폴더안에 dataset이란 폴더가 중복해서 들어가있음

- rm(remove)함수 통해서 삭제. -rf 옵션은 디렉토리를 강제로(recursively) 삭제(해당 데이터가 없어도 에러나지 않게)

!rm -rf train/dataset
train_labels = os.listdir('train') # train 안에 들어있는 디렉토리를 리스트로 가져옴
print(train_labels) // ['Shellder', 'Squirtle', 'Muk', 'Charmeleon'...'Nidoqueen', 'Kabuto']
print(len(train_labels)) // 149
val_labels = os.listdir('validation')
print(val_labels) // ['Binacle', 'Fennekin', 'Golett' ... 'Huntail', 'Cutiefly']
print(len(val_labels)) // 898

# train: 149, validation: 898
# validation에서 train에 있는 디렉토리를 확인하여 없는 디렉토리를 모두 제거

import shutil

for val_label in val_labels:
    if val_label not in train_labels:
        shutil.rmtree(os.path.join('validation',val_label))

val_labels = os.listdir('validation')
len(val_labels) // 147

# validation이 147개가 됐음. train에 있는 2개 클래스가 없음

# 없는 클래스가 뭔지 확인해보고 파일에 사진 넣어주기

for train_label in train_labels:
    if train_label not in val_labels:
        print(train_label)
        os.makedirs(os.path.join('validation', train_label), exist_ok=True)

// MrMime
Farfetchd

만들어진 MrMime 폴더와 Farfetchd 폴더에 사진 찾아서 넣어줌

val_labels = os.listdir('validation')
len(val_labels) // 149
# 필요한 모듈 준비
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
# GPU 사용 확인
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device) // cpu
# 이미지 증강기법 사용
# data_transforms
# train / validation
data_transforms = {
    'train' : transforms.Compose([ # Compose : 한꺼번에 묶어서 실행
        transforms.Resize((224,224)),
        # (각도(처음 넣는 데이터라서 이름 생략), 찌그러뜨림, 크기(범위))
        transforms.RandomAffine(0, shear = 10, scale=(0.8, 1.2)),
        # 수평으로 뒤집기
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ]),
    'validation' : transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()])
}
# 데이터셋 객체 생성
# image_datasets
# train / validation

image_datasets = {
    # 키값 이름으로 데이터셋 객체가 만들어짐
    'train': datasets.ImageFolder('train', data_transforms['train']),
    'validation': datasets.ImageFolder('validation', data_transforms['validation'])
}
print(len(image_datasets['train']), len(image_datasets['validation']))
// 10657 663
# 데이터로더 생성
# dataloaders
# batch_size = 32
# shuffle=True

dataloaders = {
    'train' : DataLoader(
        image_datasets['train'],
        batch_size=32,
        shuffle=True
    ),
    'validation': DataLoader(
        image_datasets['validation'],
        batch_size=32,
        shuffle=False
    )
}
# 이미지 4*8 로 출력

imgs, labels = next(iter(dataloaders['train']))

fig, axes = plt.subplots(4, 8, figsize=(20,10))

for img, label,ax in zip(imgs, labels, axes.flatten()):
    ax.imshow(img.permute(1,2,0))
    ax.set_title(label)
    ax.axis('off')

 

- EfficientNetB4 모델 사용해보기

# 사전 학습된 EfficientNetB4 모델
# 원래는 이런 과정이 필요가 없지만, efficientnet 모델만 오류가 생겨서 이런 방식으로 해줌
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights
from torchvision.models._api import WeightsEnum
from torch.hub import load_state_dict_from_url

def get_state_dict(self, *args, **kwargs):
    kwargs.pop("check_hash")
    return load_state_dict_from_url(self.url, *args, **kwargs)
WeightsEnum.get_state_dict = get_state_dict

efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1)
model = efficientnet_b4(weights="DEFAULT").to(device)
# FC Layer 수정
# print(model)

for param in model.parameters():
    param.requires_grad = False # 가져온 파라미터 (W, b)를 업데이트하지 않음

model.classifier = nn.Sequential(
    nn.Linear(1792, 256),
    nn.ReLU(),
    nn.Linear(256, 149)
).to(device)

 

# 변화 확인

print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
            (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivation(
            (0): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      .
      .
      .
  (classifier): Sequential(
    (0): Linear(in_features=1792, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=149, bias=True)
  )
)
# train, test 한번에 돌리기
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
epochs = 10
for epoch in range(epochs):
    for phase in ['train', 'validation']:
        if phase == 'train':
            model.train()
        else:
            model.eval()
        sum_losses = 0
        sum_accs = 0
        for x_batch, y_batch in dataloaders[phase]:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            y_pred = model(x_batch)
            loss = nn.CrossEntropyLoss()(y_pred, y_batch)
            if phase == 'train':
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            y_prob = nn.Softmax(1)(y_pred)
            y_pred_index = torch.argmax(y_prob, axis=1)
            acc = (y_batch == y_pred_index).float().sum() / len(y_batch) * 100

            sum_losses = sum_losses + loss
            sum_accs = sum_accs + acc
        avg_loss = sum_losses / len(dataloaders[phase])
        avg_acc = sum_accs / len(dataloaders[phase])
        print(f'{phase:10s}: Epoch {epoch+1:4d}/{epochs} Loss: {avg_loss:.4f} Accuracy: {avg_acc: .2f}%')

✔️ 학습된 모델 저장하기

# 학습된 모델 파일 저장
torch.save(model.state_dict(), 'model.pth') # 텐서플로우 : model.h5

✔️ 다시 불러올 때

model = models.efficientnet_b4()

model.classifier = nn.Sequential(
    nn.Linear(1792, 256),
    nn.ReLU(),
    nn.Linear(256, 149)
)
model.load_state_dict(torch.load('/content/model.pth'))
# map_location=torch.device('cpu') : GPU에서 학습한 모델을 CPU에 로드할 때 써줘야함
model.eval()

 

# 학습 결과 확인

from PIL import Image

img1 = Image.open('/content/validation/Ditto/0.jpg')
img2 = Image.open('/content/validation/Charmander/0.jpg')

fig, axes = plt.subplots(1,2, figsize = (12, 6))
axes[0].imshow(img1)
axes[0].axis('off')
axes[1].imshow(img2)
axes[1].axis('off')
plt.show()

img1_input = data_transforms['validation'](img1)
img2_input = data_transforms['validation'](img2)
print(img1_input.shape) // torch.Size([3, 224, 224])
print(img2_input.shape) // torch.Size([3, 224, 224])

test_batch = torch.stack([img1_input, img2_input])
test_batch = test_batch.to(device)
test_batch.shape // torch.Size([2, 3, 224, 224])

 

# torch.topk 함수는 주어진 입력 텐서에서 상위 k개의 값과 그에 해당하는 인덱스를 반환하는 함수입니다.

y_pred = model(test_batch)
y_prob = nn.Softmax(1)(y_pred)

probs, idx = torch.topk(y_prob, k=3)

print(probs) 
//tensor([[0.9906, 0.0028, 0.0019],
        [0.8180, 0.1767, 0.0020]], grad_fn=<TopkBackward0>)
        
print(idx)
// tensor([[ 22,  18,  28],
        [ 14, 127,  60]])
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
axes[0].set_title('{:.2f}% {}, {:.2f}% {}, {:.2f}% {}'.format(
    probs[0, 0] * 100,
    image_datasets['validation'].classes[idx[0, 0]],
    probs[0, 1] * 100,
    image_datasets['validation'].classes[idx[0, 1]],
    probs[0, 2] * 100,
    image_datasets['validation'].classes[idx[0, 2]],
))
axes[0].imshow(img1)
axes[0].axis('off')
axes[1].set_title('{:.2f}% {}, {:.2f}% {}, {:.2f}% {}'.format(
    probs[1, 0] * 100,
    image_datasets['validation'].classes[idx[1, 0]],
    probs[1, 1] * 100,
    image_datasets['validation'].classes[idx[1, 1]],
    probs[1, 2] * 100,
    image_datasets['validation'].classes[idx[1, 2]],
))
axes[1].imshow(img2)
axes[1].axis('off')
plt.show()

메타몽과 파이리 둘다 잘 맞춤


my_img = Image.open('/content/chung.jpg')
my_img_input = data_transforms['validation'](my_img)
my_batch = torch.stack([my_img_input])
my_batch = my_batch.to(device)
my_pred = model(my_batch)
my_prob = nn.Softmax(1)(my_pred)
val, idx = torch.topk(my_prob, 3)
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
axes[0].imshow(my_img)
axes[0].axis('off')
axes[1].set_title('{:2f}% {}, {:2f}% {}, {:2f}% {}'.format(
    val[0, 0] * 100,
    image_datasets['validation'].classes[idx[0, 0]],
    val[0, 1] * 100,
    image_datasets['validation'].classes[idx[0, 1]],
    val[0, 2] * 100,
    image_datasets['validation'].classes[idx[0, 2]],
))
src = image_datasets['validation'].classes[idx[0, 1]]
i = Image.open(f'validation/{src}/0.jpg')
axes[1].imshow(i)
axes[1].axis('off')
plt.show()

'AI' 카테고리의 다른 글

자연어 처리 진행 순서  (0) 2024.01.19
자연어 처리 개요  (0) 2024.01.16
전이 학습  (0) 2024.01.12
간단한 CNN모델 만들기 실습  (1) 2024.01.11
CNN 기초  (0) 2024.01.10