注:摘自泰坦尼克和房价预测
房价预测
Scatter
#Check the graphic again fig, ax = plt.subplots() ax.scatter(train['GrLivArea'], train['SalePrice']) plt.ylabel('SalePrice', fontsize=13) plt.xlabel('GrLivArea', fontsize=13) plt.show()
ar = 'GrLivArea' data = pd.concat([train['SalePrice'], train[var]], axis=1) data.plot.scatter(x=var, y='SalePrice', ylim=(0,800000));
tmp = train_test[train_test['SalePrice'].isnull().values==False] plt.scatter(tmp[tmp['TotalBsmtSF']>0]['TotalBsmtSF'], tmp[tmp['TotalBsmtSF']>0]['SalePrice'])
sns.set() cols = ['SalePrice', 'OverallQual', 'GrLivArea','GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt'] sns.pairplot(train[cols], size = 2.5) plt.show();
Heatmap
sns.set(font_scale=1.5) hm = plt.subplots(figsize=(20, 12))#调整画布大小 hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values) plt.show()
# Hist & Curve ![https://github.com/Cooper111/kaggle/tree/dev/competitions/getting-started/house-price](https://github.com/Cooper111/kaggle/raw/dev/static/images/competitions/getting-started/house-price/output_25_1.png)
fig = plt.figure(figsize=(12, 6)) ax1 = fig.add_subplot(1, 2, 1) ax2 = fig.add_subplot(1, 2, 2) ax1.hist(train.SalePrice) ax2.hist(np.log1p(train.SalePrice))
sns.distplot(train['SalePrice'], fit=norm) fig = plt.figure() res = stats.probplot(train['SalePrice'], plot=plt)
plt.plot(params, test_scores) plt.title('n_estimators vs CV Error') plt.show()
# 绘制误差曲线 plt.plot(train_sizes/len(x_train), train_loss_mean, 'o-', color = 'r', label = 'Training') plt.plot(train_sizes/len(x_train), test_loss_mean, 'o-', color = 'g', label = 'Cross-Validation') plt.xlabel('Training data size') plt.ylabel('Loss') plt.legend(loc = 'best') plt.show()
泰坦尼克
只上新
train[['Pclass','Survived']].groupby(['Pclass']).mean().plot.bar() #不加bar就是连续线状
g = sns.FacetGrid(train, col='Survived',size=5) g.map(plt.hist, 'Age', bins=40)
sns.countplot('Embarked',hue='Survived',data=train)