绘图实例

:摘自泰坦尼克和房价预测

房价预测

Scatter

https://blog.csdn.net/Irving_zhang/article/details/78561105

#Check the graphic again
fig, ax = plt.subplots()
ax.scatter(train['GrLivArea'], train['SalePrice'])
plt.ylabel('SalePrice', fontsize=13)
plt.xlabel('GrLivArea', fontsize=13)
plt.show()

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

ar = 'GrLivArea'
data = pd.concat([train['SalePrice'], train[var]], axis=1)
data.plot.scatter(x=var, y='SalePrice', ylim=(0,800000));

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

tmp = train_test[train_test['SalePrice'].isnull().values==False]

plt.scatter(tmp[tmp['TotalBsmtSF']>0]['TotalBsmtSF'], tmp[tmp['TotalBsmtSF']>0]['SalePrice'])

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

sns.set()
cols = ['SalePrice', 'OverallQual', 'GrLivArea','GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']
sns.pairplot(train[cols], size = 2.5)
plt.show();





Heatmap

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

sns.set(font_scale=1.5)
hm = plt.subplots(figsize=(20, 12))#调整画布大小
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)
plt.show()




# Hist & Curve ![https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price](https://github.com/Cooper111/kaggle/raw/dev/static/images/competitions/getting-started/house-price/output_25_1.png)
fig = plt.figure(figsize=(12, 6))
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
ax1.hist(train.SalePrice)
ax2.hist(np.log1p(train.SalePrice))

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

sns.distplot(train['SalePrice'], fit=norm)
fig = plt.figure()
res = stats.probplot(train['SalePrice'], plot=plt)

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

plt.plot(params, test_scores)
plt.title('n_estimators vs CV Error')
plt.show()

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price

# 绘制误差曲线
plt.plot(train_sizes/len(x_train), train_loss_mean, 'o-', color = 'r', label = 'Training')
plt.plot(train_sizes/len(x_train), test_loss_mean, 'o-', color = 'g', label = 'Cross-Validation')

plt.xlabel('Training data size')
plt.ylabel('Loss')
plt.legend(loc = 'best')
plt.show()

泰坦尼克

只上新
https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/titanic

train[['Pclass','Survived']].groupby(['Pclass']).mean().plot.bar()
#不加bar就是连续线状

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/titanic

g = sns.FacetGrid(train, col='Survived',size=5)
g.map(plt.hist, 'Age', bins=40)

https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/titanic

sns.countplot('Embarked',hue='Survived',data=train)