%matplotlib inline

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


## 基本設定¶

sns.set(font='IPAPMincho', font_scale=1.8)


## Histogram¶

data = np.random.randn(1000)
data[:10]

array([-0.53267554,  0.03851161, -0.16072742, -0.70889663,  0.23085979,
-1.61295347, -0.46508874,  0.60112507,  0.42017249, -0.73656917])

seaborn 是建立在 matplotlib 之上, 因此 matplotlib 也可以直接拿來跟 seaborn 產生的圖互動

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('Defualt style with kde')
sns.distplot(data, kde=True);

plt.subplot(1, 2, 2)
sns.set_style('dark')
plt.title('Dark style without kde');
sns.distplot(data, kde=False);


## Scatter plot¶

df = pd.DataFrame({
'x': np.random.randn(100),
'y': np.random.randn(100)})

x y
0 -2.863752 -1.066424
1 -0.779238 0.862169
2 0.016786 -0.016519
3 0.948504 0.298314
4 2.029428 1.211997

sns.set()


• 一般用lmplot畫, 然後設定 fit_reg=False 就可以讓 regression line 消失. 有時候有沒有那條線影響圖很大
• 一樣先 x, 再 y
for fit_reg in [True, False]:
sns.lmplot('x', 'y',
data=df,
fit_reg=fit_reg,
scatter_kws={"marker": "D", "s": 100})
title = 'Show regression line' if fit_reg else 'Without regression line'
plt.title(title)


## Correlation matrix / Heatmap¶

df = pd.DataFrame({
'x1': np.random.randn(100),
'x2': np.random.randn(100),
'x3': np.random.randn(100)
})

x1 x2 x3
0 1.269566 0.349083 -0.000743
1 -1.634587 0.072568 0.042596
2 -0.581238 -0.337935 -0.412084
3 -0.080881 -1.376481 1.361046
4 -0.609886 -1.061285 0.265788

corr = df.astype(float).corr()
corr

x1 x2 x3
x1 1.000000 -0.034731 0.032407
x2 -0.034731 1.000000 -0.192169
x3 0.032407 -0.192169 1.000000
sns.set(font_scale=1.5)
sns.heatmap(corr, cmap='Blues', annot=True, annot_kws={"size": 15},
xticklabels=corr.columns.values,
yticklabels=corr.columns.values);