import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

# initialize the Seaborn library
sns.set()

tips = sns.load_dataset('tips') # we load the Tips dataset from the repository of standard datasets

tips.head() # we will have a data frame loaded in the tips variable

# with the following command, we draw a graph of the distribution of the values of the `total_bill` attribute
g = sns.distplot(tips['total_bill'])

# we plot distplot() for the total_bill attribute from the tips data frame without displaying the approximation,
# we use 40 intervals for discretization
g = sns.distplot(tips['total_bill'],kde=False,bins=40)

# plot an approximation of the density of the distribution, with data points, without a histogram
g = sns.distplot(tips['total_bill'],kde=True, hist=False, rug=True)

for col in ['tip', 'total_bill']:   # we go through all the attributes we want to render in a simple loop
    sns.kdeplot(tips[col])          # use kdeplot to plot the KDE curve

# on the x-axis we plot the tip values, on the y-axis the total_bill values
# we use the tips data frame as the source data
g = sns.scatterplot(x='tip', y='total_bill',data=tips)

# let's draw a scatterplot as in the previous example, just use the hue parameter set to the 'smoker' attribute
# to distinguish the points by color
g = sns.scatterplot(x='tip', y='total_bill', hue='smoker', data=tips)

g = sns.scatterplot(x='tip', y='total_bill', hue='smoker', size='size', style='sex', data=tips)

# the example below shows a combination of the same variables as the previous examples
g = sns.regplot(x='tip',y='total_bill',data=tips)

# analogous to the scatterplot example
# we distinguish the graph using the hue parameter according to the values of the 'smoker' attribute
g = sns.lmplot(x='tip',y='total_bill', hue='smoker', data=tips)

# the given example combines the visualization of the combination of the values of the two attributes tip and total_bill (as a scatterplot)
# it then supplements them with visualizations of value distributions (like distplot)
g = sns.jointplot(x='tip', y='total_bill',data=tips, kind='scatter')

# YOUR CODE HERE

col = ['tip', 'total_bill', 'size'] # we specify the columns for which we want to draw a graph
g = sns.pairplot(tips[col])         # we call the pairplot function with a parameter

# YOUR CODE HERE

g = sns.countplot(x='smoker', data=tips)

### YOUR CODE HERE

g = sns.barplot(x='sex',y='tip',data=tips) # this command visualizes the average tip amount for men and women

# YOUR CODE HERE

# the code below plots the distribution of tip amount (tip attribute) by gender.
g = sns.boxplot(x="sex", y="tip", data=tips)

# this example splits the visualization by the ``smoker'' attribute for smokers/non-smokers.
g = sns.boxplot(x="day", y="tip", hue="smoker",data=tips)

# YOUR CODE HERE

# YOUR CODE HERE

# the same example from the previous demos - on the x-axis we plot the value of the 'day' attribute
# on the y-axis of the 'tip' attribute value, the source dataset is 'tips'
g = sns.stripplot(x="day", y="tip", data=tips)

# this graph visualizes the amount of tips (tip) for men and women by individual days of the week (attribute day)
# the density of rendering points (jitter) is set manually to 0.3 and the color palette is also set to coolwarm
g = sns.stripplot(x="day", y="tip", hue="sex",data=tips, jitter = 0.3, palette = 'coolwarm')

# YOUR CODE HERE

tips['tip_pct'] = 100 * tips['tip'] / tips['total_bill'] # we will create a new column with the value of the tip share on the bill

g = sns.FacetGrid(tips, row="sex", col="time") # we will create a grid where there will be rows according to the value of gender and columns according to time
g.map(plt.hist, "tip_pct")                     # plot matplotlib histograms in a grid

g = sns.catplot(x="day", y="tip", hue="sex", col="smoker", data=tips, kind="bar")

# rendering a heatmap for the correlation table of the tips framework
# tips.corr() function calculates the correlations of all numeric attributes of the tips data frame
g = sns.heatmap(tips.corr())

g = sns.heatmap(tips.corr(),cmap='coolwarm',annot=True)

mask = np.zeros_like(tips.corr(), dtype=np.bool) 
mask[np.triu_indices_from(tips.corr())] = True # triu_indices_from() returns the indices of the upper triangle from the input field, the mask is set to True on them
g = sns.heatmap(tips.corr(), mask=mask, annot=True, square=True) # a heatmap with a mask is drawn

heatmap_data = pd.pivot_table(tips, values='total_bill', index=['size'], columns='day') # we will create a pivot table - values of the total account according to the size of the group and the day
g = sns.heatmap(heatmap_data, annot=True, cmap="YlGnBu",  cbar=False)                   # we draw the heatmap

titanic = sns.load_dataset('titanic') # load the Titanic dataset from the standard datasets repository
g = sns.heatmap(titanic.isnull(), cbar = False) # we draw a heatmap for those elements of the data frame that are missing, we don't draw the bar

g = sns.distplot(tips['tip'])

with sns.axes_style('darkgrid'):        # we apply the style using the axes_style function
         g = sns.distplot(tips['tip'])  # draw the graph

g = sns.distplot(tips['tip'])
g = sns.despine()

g = sns.distplot(tips['tip'])
g = sns.despine(offset=5, trim=True, left=True, bottom=False)

sns.set_context('talk', font_scale = 1.3)
g = sns.distplot(tips['tip'])

with sns.axes_style('whitegrid'):
    g = sns.distplot(tips['tip'])
    g.set(xlabel='deň', ylabel='distribúcia')

g = sns.distplot(tips['tip'])
# if we want, we can combine Seaborn with matplotlib - e.g. we will use the title() function to render the graph header
g = plt.title('Distribution plot example', fontsize=14, fontweight='bold')

sns.set_palette("Dark2")
g = sns.distplot(tips['tip'])

Visualizations using the Seaborn library - exercise 1¶

Loading dataset¶

Visualizations of value distribution - numerical attributes¶

Visualizations of the dependence of two numerical variables¶

Scatter plots¶

Regression plots¶

Combined visualizations - Joint plot¶

Task 4.1¶

Combined visualizations - Pair graph¶

Task 4.2¶

Visualization of distribution of distribution of values - categorical attributes¶

Task 4.3¶

Visualizations of interdependence of 2 variables of different types¶

Bar chart¶

Task 4.4.¶

Box plot¶

Task 4.5¶

Scatter plots for categorical variables¶

Strip plot¶

Swarm plot¶

Task 4.6¶

Combined visualizations - Facet grids¶

Facet histograms¶

Plotting dependence with a third categorical variable¶

Heat maps¶

Style and rendering settings¶

Task 4.7¶