Open In Colab

Perguntas que ira validar

  • Quais são as regiões mais caras e baratas
  • Quais as caracteristicas dos imoveis com valor de alugue mais alto
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
%matplotlib inline
airbnb_url = 'https://raw.githubusercontent.com/ManarOmar/New-York-Airbnb-2019/master/AB_NYC_2019.csv'
df = airbnb_ori = pd.read_csv(airbnb_url)
airbnb_ori.head()
id name host_id host_name neighbourhood_group neighbourhood latitude longitude room_type price minimum_nights number_of_reviews last_review reviews_per_month calculated_host_listings_count availability_365
0 2539 Clean & quiet apt home by the park 2787 John Brooklyn Kensington 40.64749 -73.97237 Private room 149 1 9 2018-10-19 0.21 6 365
1 2595 Skylit Midtown Castle 2845 Jennifer Manhattan Midtown 40.75362 -73.98377 Entire home/apt 225 1 45 2019-05-21 0.38 2 355
2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 Elisabeth Manhattan Harlem 40.80902 -73.94190 Private room 150 3 0 NaN NaN 1 365
3 3831 Cozy Entire Floor of Brownstone 4869 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 Entire home/apt 89 1 270 2019-07-05 4.64 1 194
4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 Laura Manhattan East Harlem 40.79851 -73.94399 Entire home/apt 80 10 9 2018-11-19 0.10 1 0
df.shape
(48895, 16)
df.sort_values('price', ascending=False, inplace=True)
df.head()
id name host_id host_name neighbourhood_group neighbourhood latitude longitude room_type price minimum_nights number_of_reviews last_review reviews_per_month calculated_host_listings_count availability_365
9151 7003697 Furnished room in Astoria apartment 20582832 Kathrine Queens Astoria 40.76810 -73.91651 Private room 10000 100 2 2016-02-13 0.04 1 0
29238 22436899 1-BR Lincoln Center 72390391 Jelena Manhattan Upper West Side 40.77213 -73.98665 Entire home/apt 10000 30 0 NaN NaN 1 83
17692 13894339 Luxury 1 bedroom apt. -stunning Manhattan views 5143901 Erin Brooklyn Greenpoint 40.73260 -73.95739 Entire home/apt 10000 5 5 2017-07-27 0.16 1 0
40433 31340283 2br - The Heart of NYC: Manhattans Lower East ... 4382127 Matt Manhattan Lower East Side 40.71980 -73.98566 Entire home/apt 9999 30 0 NaN NaN 1 365
6530 4737930 Spanish Harlem Apt 1235070 Olson Manhattan East Harlem 40.79264 -73.93898 Entire home/apt 9999 5 1 2015-01-02 0.02 1 0
df.sort_values('number_of_reviews', ascending=False, inplace=True)
print(df.head())
             id  ... availability_365
11759   9145202  ...              333
2031     903972  ...              293
2030     903947  ...              342
2015     891117  ...              339
13495  10101135  ...              173

[5 rows x 16 columns]
df.price.mean()
152.7206871868289
df.boxplot('price',vert=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7ff98bbe4150>
df[df.price>152]
name host_id host_name neighbourhood_group neighbourhood latitude longitude room_type price minimum_nights number_of_reviews last_review reviews_per_month calculated_host_listings_count availability_365
4870 Private brownstone studio Brooklyn 12949460 Asa Brooklyn Park Slope 40.67926 -73.97711 Entire home/apt 160 1 488 2019-07-01 8.14 1 269
2163 TriBeCa 2500 Sq Ft w/ Priv Elevator 273174 Jon Manhattan Tribeca 40.71927 -74.00453 Entire home/apt 575 1 447 2019-07-01 5.89 3 207
1547 NYC 1st Shipping Container Home 3587751 Janet-David Brooklyn Williamsburg 40.70995 -73.95536 Entire home/apt 220 1 404 2019-06-25 4.90 2 341
398 ☆Massive DUPLEX☆ 2BR & 2BTH East Village 9+ Gu... 627217 Seith Manhattan East Village 40.72939 -73.98857 Entire home/apt 189 2 403 2019-07-07 4.10 3 201
5382 Bright, quiet, cozy 1BR by C Park! 20116872 Michael Manhattan Upper West Side 40.77571 -73.97757 Entire home/apt 195 1 401 2019-06-30 6.76 1 178
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
45818 Charming E. Village 2 Bedroom: Steps to the Park! 263427276 Helene Manhattan East Village 40.72564 -73.98030 Entire home/apt 300 4 0 NaN NaN 1 297
46743 Amazing 3 BEDROOMS Brooklyn 217807790 Jessica Brooklyn Williamsburg 40.71647 -73.94657 Entire home/apt 500 3 0 NaN NaN 1 354
39356 SHOW STOPPER/BEST APARTMENT IN HARLEM 203982404 Maxime C/Armande C Manhattan East Harlem 40.80701 -73.94041 Entire home/apt 500 2 0 NaN NaN 6 173
30872 The Madison - A One Bedroom Apartment 179634496 Cristian Manhattan East Harlem 40.79983 -73.94481 Entire home/apt 300 2 0 NaN NaN 1 0
47816 True 1-Bedroom in Waterfront Luxury Building 9618786 Natalia Brooklyn Williamsburg 40.72003 -73.96242 Entire home/apt 350 4 0 NaN NaN 1 9

14879 rows × 15 columns


duplicados = df[df.duplicated()]
print(duplicados)
Empty DataFrame
Columns: [id, name, host_id, host_name, neighbourhood_group, neighbourhood, latitude, longitude, room_type, price, minimum_nights, number_of_reviews, last_review, reviews_per_month, calculated_host_listings_count, availability_365]
Index: []
df.drop(columns=['id'], inplace=True)


plt.figure(figsize =(8,8))
sns.violinplot(df.price)
plt.title('Gráfico de Violino')
plt.show()
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning

sns.histplot(data=df, x="number_of_reviews", bins=10,  stat="percent", kde=True)
plt.title('percentual dos comentários')
plt.show()

sns.histplot(data=df, x="price", bins=10,  stat="percent", kde=True)
plt.title('percentual dos preços')
plt.show()