EDA Climate Change Delhi
Se a umidade afeta o clima local
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("daily_serie_train.csv")
df.head()
df.info()
df.isna().sum()
df.date = pd.DatetimeIndex(df.date.values)
df = df.set_index('date')
df.head()
df.describe()
def remove_outliers(df, col):
q1 = df[col].quantile(0.25)
q3 = df[col].quantile(0.75)
iqr = q3-q1
df = df[(df[col]>(q1-1.5*iqr)) & (df[col]<(q3+1.5*iqr))]
return df
for col in ["wind_speed","meanpressure"]:
df=remove_outliers(df, col)
df[['meanpressure']].boxplot()
plt.show()
selected_features = ['humidity', 'wind_speed']
target = ['meantemp']
df = df[selected_features + target]
train = df[df.index<"2016-08-01"]
test = df[df.index>="2016-08-01"]
train['month'] = train.index.month
train = train.join(train.groupby('month').agg({'meantemp':'mean'}).reset_index(), on='month', rsuffix='_month')
test['month'] = test.index.month
test = test.join(train.groupby('month').agg({'meantemp':'mean'}).reset_index(), on='month', rsuffix='_month')
train.drop(columns=['month','month_month'], inplace=True)
test.drop(columns=['month','month_month'], inplace=True)
from sklearn.preprocessing import MinMaxScaler
min_max = MinMaxScaler()
train = pd.DataFrame(data = min_max.fit_transform(train), columns=train.columns)
test = pd.DataFrame(data =min_max.transform(test), columns=test.columns)
train.head()
for idx, column in enumerate(df.columns):
fig, axs = plt.subplots(1,2)
fig.set_size_inches(10,5)
sns.boxplot(df[column], ax=axs[0])
sns.histplot(df[column], ax=axs[1], kde=True)
plt.show()
df.resample("M").mean().head()
df.resample("M").mean().meantemp.plot()
df.resample("H").ffill().head()
df.resample("H").bfill()
df.plot(subplots=True, figsize=(15,20))
df.corr()['humidity_lag']=df.humidity.shift()
df.dropna(inplace=True)
corr=df.corr()
sns.heatmap(data=corr,annot=True)
sns.pairplot(df)
plt.title('Dispersão de pares')
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(df.meantemp, freq=365)
ax = result.plot()
ax.set_size_inches(15,20)
plt.show()
temp_log = np.log(df['meantemp'])
rolmean_log = temp_log.rolling(window=7).mean()
rolstd = temp_log.rolling(window=7).std()
fig, ax = plt.subplots(figsize=(15,10))
orig = plt.plot(temp_log, color = 'blue', label = 'Transformação Logarítmica')
mean = plt.plot(rolmean_log, color = 'red', label = 'Média Móvel de Transformação')
desvio = plt.plot(rolstd, color = 'black', label = 'Desvio Padrão Móvel')
plt.legend(loc='best')
plt.title('Estatísticas de rolagem - Log')
ax.xaxis_date()
fig.autofmt_xdate()
plt.show(block=False)
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
print('Resultados: Dickey Fuller test: ')
adftest = adfuller(df['meantemp'])
out = pd.Series(adftest[0:4], index=['Teste', 'p-valor', 'Lags', 'Numero de observações usadas'])
for key,value in adftest[4].items():
out['Valor Crítico (%s): ' % key] = value
print(out)
log_menos_media = temp_log - rolmean_log
log_menos_media.dropna(inplace=True)
rolmean_log = log_menos_media.rolling(window=7).mean()
rolstd = log_menos_media.rolling(window=7).std()
fig, ax = plt.subplots(figsize=(15,10))
orig = plt.plot(log_menos_media, color = 'blue', label = 'Transformação Logarítmica')
mean = plt.plot(rolmean_log, color = 'red', label = 'Média Móvel de Transformação')
desvio = plt.plot(rolstd, color = 'black', label = 'Desvio Padrão Móvel')
plt.legend(loc='best')
plt.title('Estatísticas de rolagem - Log')
ax.xaxis_date()
fig.autofmt_xdate()
plt.show(block=False)
print('Resultados: Dickey Fuller test: ')
adftest = adfuller(log_menos_media, autolag = 'AIC')
dfoutput = pd.Series(adftest[0:4], index=['Teste', 'p-valor', '#Lags', 'Numero de observações usadas'])
for key,value in adftest[4].items():
dfoutput['Valor Crítico (%s): ' % key] = value
print(dfoutput)
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(df.meantemp)
plot_pacf(df.meantemp)
plt.show()
Conclusão
- A umidade pode ser uma variável. pois possui correlação fraca com a média de temperatura