import pandas as pd
surveys_df = pd.read_csv("data/surveys.csv",
keep_default_na=False, na_values=[""])
surveys_df
species_df = pd.read_csv("data/species.csv",
keep_default_na=False, na_values=[""])
species_df
# Read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
# Grab the last 10 rows
survey_sub_last10 = surveys_df.tail(10)
# Reset the index values to the second dataframe appends properly
survey_sub_last10 = survey_sub_last10.reset_index(drop=True)
# drop=True option avoids adding new index column with old index values
survey_sub_last10
# Stack the DataFrames on top of each other
vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)
vertical_stack
# Place the DataFrames side by side
horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)
horizontal_stack.head()
df2001 = pd.read_csv('data/surveys2001.csv', keep_default_na=False, na_values=[""])
df2001.head()
df2002 = pd.read_csv('data/surveys2002.csv', keep_default_na=False, na_values=[""])
df2002.head()
vertical_df = pd.concat([df2001, df2002], axis=0)
vertical_df
by_site_sex = vertical_df.groupby(['year','sex'])
site_sex_count = by_site_sex['weight'].mean()
site_sex_count
spc = site_sex_count.unstack()
s_plot = spc.plot(kind='bar', stacked=True, title="Total weight by year and sex")
s_plot.set_ylabel("Weight")
s_plot.set_xlabel("Year")
spc.to_csv('data/weightbyyearsex.csv', index=False)
spcread = pd.read_csv('data/weightbyyearsex.csv')
spcread
surveys_df = pd.read_csv("data/surveys.csv",
keep_default_na=False, na_values=[""])
surveys_df.head()
species_df = pd.read_csv("data/species.csv",
keep_default_na=False, na_values=[""])
species_df.head()
merged = pd.merge(left=surveys_df, right=species_df, left_on='species_id', right_on='species_id')
merged.head()
merged_taxaplot = merged.groupby(['plot_id'])
plot_taxa_count = merged_taxaplot['taxa'].count()
plot_taxa_count.plot(kind='bar', stacked=True, title="Taxa by plot")
merged_taxaplot = merged.groupby(['sex','plot_id'])
plot_taxa_count = merged_taxaplot['taxa'].count()
plot_taxa_count.plot(kind='bar', stacked=True, title="Taxa by sex and plot")