DAY 1 : EDA
210823
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as img
import cv2
import osdata = pd.read_csv('./input/data/train/train.csv')
data


개선점
Last updated
210823
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as img
import cv2
import osdata = pd.read_csv('./input/data/train/train.csv')
data


Last updated
data.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2700 entries, 0 to 2699
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 2700 non-null object
1 gender 2700 non-null object
2 race 2700 non-null object
3 age 2700 non-null int64
4 path 2700 non-null object
dtypes: int64(1), object(4)
memory usage: 105.6+ KBdata.describe(include='all')group = data.groupby('gender')['age'].value_counts().sort_index()
fig, axes = plt.subplots(1, 2, figsize=(15, 7), sharey=True)
axes[0].bar(group['male'].index, group['male'], color='royalblue')
axes[1].bar(group['female'].index, group['female'], color='tomato')
plt.show()DATA_DIR = './input/data/train/images/'
FILES = ['mask1.jpg', 'mask2.jpg', 'mask3.jpg', 'mask4.jpg', 'mask5.jpg', 'incorrect_mask.jpg', 'normal.jpg']
fig, axes = plt.subplots(5, 7, figsize = (20, 20), dpi=150)
for j in range(5):
sample = data.sample(1)
path = sample['path'].values[0]
sample_path = os.path.join(DATA_DIR, path)
for i, name in enumerate(FILES):
image_path = os.path.join(sample_path, name)
image = img.imread(image_path)
axes[j][i].imshow(image)
axes[j][i].axis('off')
axes[j][i].set_title(name[:-4])
plt.show()