EDA Airbnb
Um método de Machine Learning que automatiza o processo
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
%matplotlib inline
airbnb_url = 'https://raw.githubusercontent.com/ManarOmar/New-York-Airbnb-2019/master/AB_NYC_2019.csv'
df = airbnb_ori = pd.read_csv(airbnb_url)
airbnb_ori.head()
df.shape
df.sort_values('price', ascending=False, inplace=True)
df.head()
df.sort_values('number_of_reviews', ascending=False, inplace=True)
print(df.head())
df.price.mean()
df.boxplot('price',vert=False)
df[df.price>152]
duplicados = df[df.duplicated()]
print(duplicados)
df.drop(columns=['id'], inplace=True)
plt.figure(figsize =(8,8))
sns.violinplot(df.price)
plt.title('Gráfico de Violino')
plt.show()
sns.histplot(data=df, x="number_of_reviews", bins=10, stat="percent", kde=True)
plt.title('percentual dos comentários')
plt.show()
sns.histplot(data=df, x="price", bins=10, stat="percent", kde=True)
plt.title('percentual dos preços')
plt.show()