import re # for regular expressions
import nltk # for text manipulation
import string
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('winequality-white.csv',sep=";")
df.head()
df.tail()
df.shape
df.info()
df.describe()
df.quality.unique()
df.quality.value_counts()
df.corr()
corr = df.corr()
ax = sns.heatmap(
corr, vmin =-1, vmax =1, center=0,
cmap = sns.diverging_palette(20, 220, n=200),
square = True, linewidths=.5
)
ax.set_xticklabels(
ax.get_xticklabels(),
rotation=45,
horizontalalignment='right'
);
b_plot = sns.boxplot(y = 'fixed acidity', data = df, width = 0.5)
b_plot = sns.boxplot(y = 'volatile acidity', data = df, width = 0.5)
b_plot = sns.boxplot(y = 'alcohol', data = df, width = 0.5)
l = df.columns.values
number_of_columns=12
number_of_rows = 12-1/number_of_columns
plt.figure(figsize=(number_of_columns,10*number_of_rows))
for i in range(0,12):
plt.subplot(number_of_rows + 1,number_of_columns,i+1)
sns.set_style('whitegrid')
sns.boxplot(df[l[i]],color='green',orient='v')
plt.tight_layout()