Como manipular dados acessando API em Github
Resolvendo exercícios com pandas
! git clone https://github.com/Mario-RJunior/data-manipulation-exercises
cd data-manipulation-exercises/
ls
import pandas as pd
df = pd.read_csv('datasets/users_dataset.csv', sep='|')
df.head(15)
users.tail(10)
df.shape
users.columns
df.dtypes
df['occupation']
print(df['occupation'].unique())
len(df['occupation'].unique())
df['occupation'].value_counts().head(1)
df['age'].mean()
users.describe()
! git clone https://github.com/Mario-RJunior/data-manipulation-exercises
cd data-manipulation-exercises/
ls
import pandas as pd
crime = pd.read_csv('datasets/US_Crime_Rates_1960_2014.csv')
crime.dtypes
print(crime['Year'].head())
crime['Year'] = pd.to_datetime(crime['Year'], format='%M')
crime['Year'].dtypes
crime.set_index('Year', inplace=True)
crime.head()
crime.drop(columns='Total', inplace=True)
crime.columns
crime[crime.index == '1978']['Vehicle_Theft']
crime[crime['Murder'] == crime['Murder'].max()]
crime[crime['Vehicle_Theft'] == crime['Vehicle_Theft'].min()]['Murder']
! git clone https://github.com/Mario-RJunior/data-manipulation-exercises
ls
cd data-manipulation-exercises/
ls
import pandas as pd
users = pd.read_csv('datasets/users_dataset.csv', sep='|')
users.groupby('occupation')['age'].mean()
users.groupby('occupation').agg({'age': ['min', 'max']})
users.groupby(['occupation', 'gender'])['age'].mean()
df2 = users.groupby('occupation')['gender'].value_counts()
porcentagem = df2.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
porcentagem