Lab3-4_BA¶

Berent Aldikacti¶

09/12/20¶

import pandas as pd
surveys_df = pd.read_csv("data/surveys.csv")

Challenge 1¶

a = [1, 2, 3, 4, 5]
a[0]

1

a[5]

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-11-4a84d856522b> in <module>
----> 1 a[5]

IndexError: list index out of range

Above returns an error because it does not have a 6th character in the list.

a[len(a)]

Challenge 2¶

surveys_df = pd.read_csv("data/surveys.csv")
surveys_df.head()

surveys_df.iloc[0:3, 1:4]

surveys_df.loc[[0, 10], :]

surveys_df.loc[0, ['species_id', 'plot_id', 'weight']]

species_id     NL
plot_id         2
weight        NaN
Name: 0, dtype: object

surveys_df.loc[[0, 10, 35549], :]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-16-b2913a4edc78> in <module>
----> 1 surveys_df.loc[[0, 10, 35549], :]

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in __getitem__(self, key)
    871                     # AttributeError for IntervalTree get_value
    872                     pass
--> 873             return self._getitem_tuple(key)
    874         else:
    875             # we by definition only have the 0th axis

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
   1053             return self._multi_take(tup)
   1054 
-> 1055         return self._getitem_tuple_same_dim(tup)
   1056 
   1057     def _get_label(self, label, axis: int):

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple_same_dim(self, tup)
    748                 continue
    749 
--> 750             retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
    751             # We should never have retval.ndim < self.ndim, as that should
    752             #  be handled by the _getitem_lowerdim call above.

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1097                     raise ValueError("Cannot index with multidimensional key")
   1098 
-> 1099                 return self._getitem_iterable(key, axis=axis)
   1100 
   1101             # nested tuple slicing

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_iterable(self, key, axis)
   1035 
   1036         # A collection of keys
-> 1037         keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
   1038         return self.obj._reindex_with_indexers(
   1039             {axis: [keyarr, indexer]}, copy=True, allow_dups=True

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
   1252             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1253 
-> 1254         self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
   1255         return keyarr, indexer
   1256 

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
   1314                 with option_context("display.max_seq_items", 10, "display.width", 80):
   1315                     raise KeyError(
-> 1316                         "Passing list-likes to .loc or [] with any missing labels "
   1317                         "is no longer supported. "
   1318                         f"The following labels were missing: {not_found}. "

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Int64Index([35549], dtype='int64'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

surveys_df[0:1]

surveys_df[:4]

surveys_df[:-1]

surveys_df.iloc[0:4, 1:4]

surveys_df.loc[0:4, 1:4]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-21-aed7dd61eef6> in <module>
----> 1 surveys_df.loc[0:4, 1:4]

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in __getitem__(self, key)
    871                     # AttributeError for IntervalTree get_value
    872                     pass
--> 873             return self._getitem_tuple(key)
    874         else:
    875             # we by definition only have the 0th axis

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
   1053             return self._multi_take(tup)
   1054 
-> 1055         return self._getitem_tuple_same_dim(tup)
   1056 
   1057     def _get_label(self, label, axis: int):

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple_same_dim(self, tup)
    748                 continue
    749 
--> 750             retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
    751             # We should never have retval.ndim < self.ndim, as that should
    752             #  be handled by the _getitem_lowerdim call above.

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1086         if isinstance(key, slice):
   1087             self._validate_key(key, axis)
-> 1088             return self._get_slice_axis(key, axis=axis)
   1089         elif com.is_bool_indexer(key):
   1090             return self._getbool_axis(key, axis=axis)

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis)
   1121         labels = obj._get_axis(axis)
   1122         indexer = labels.slice_indexer(
-> 1123             slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc"
   1124         )
   1125 

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in slice_indexer(self, start, end, step, kind)
   4960         slice(1, 3, None)
   4961         """
-> 4962         start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
   4963 
   4964         # return a slice

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind)
   5161         start_slice = None
   5162         if start is not None:
-> 5163             start_slice = self.get_slice_bound(start, "left", kind)
   5164         if start_slice is None:
   5165             start_slice = 0

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
   5073         # For datetime indices label may be a string that has to be converted
   5074         # to datetime boundary according to its resolution.
-> 5075         label = self._maybe_cast_slice_bound(label, side, kind)
   5076 
   5077         # we need to look up the label

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in _maybe_cast_slice_bound(self, label, side, kind)
   5025         # this is rejected (generally .loc gets you here)
   5026         elif is_integer(label):
-> 5027             self._invalid_indexer("slice", label)
   5028 
   5029         return label

~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in _invalid_indexer(self, form, key)
   3262         """
   3263         raise TypeError(
-> 3264             f"cannot do {form} indexing on {type(self).__name__} with these "
   3265             f"indexers [{key}] of type {type(key).__name__}"
   3266         )

TypeError: cannot do slice indexing on Index with these indexers [1] of type int

iloc uses integers to index position but loc uses labels.

Challenge 3 - Quaries¶

surveys_df.head()

surveys_df[(surveys_df.year == 1999) & (surveys_df.weight <= 8)]

surveys_df[surveys_df['species_id'].isin(['DM','NL'])]
test = ['DM','NL']
surveys_df[surveys_df['species_id'].isin(test)]

surveys_df[(surveys_df.weight > 0) | (surveys_df.weight == 0)]

surveys_df[~(surveys_df.sex == 'M')]

surveys_df[~(surveys_df.sex == 'F')]

Challenge - Putting it all together¶

new_df = surveys_df[~(surveys_df.sex == 'F') & ~(surveys_df.sex == 'M')]
new_df.head()

x = pd.isnull(new_df['sex'])
len(x)

2511

new_df2 = surveys_df[(surveys_df.sex == 'F') | (surveys_df.sex == 'M') & (surveys_df.weight > 0)]
new_df2

by_site_sex = new_df2.groupby(['plot_id', 'sex'])
site_sex_count = by_site_sex['weight'].mean()
spc = site_sex_count.unstack()
s_plot = spc.plot(kind='bar', stacked=True, title="Total weight by site and sex")
s_plot.set_ylabel("Weight")
s_plot.set_xlabel("Plot")

Text(0.5, 0, 'Plot')

	month	day	year
0	7	16	1977
1	7	16	1977
2	7	16	1977

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
0	1	7	16	1977	2	NL	M	32.0	NaN
1	2	7	16	1977	3	NL	M	33.0	NaN
2	3	7	16	1977	2	DM	F	37.0	NaN
3	4	7	16	1977	7	DM	M	36.0	NaN
4	5	7	16	1977	3	DM	M	35.0	NaN
...	...	...	...	...	...	...	...	...	...
35543	35544	12	31	2002	15	US	NaN	NaN	NaN
35544	35545	12	31	2002	15	AH	NaN	NaN	NaN
35545	35546	12	31	2002	15	AH	NaN	NaN	NaN
35546	35547	12	31	2002	10	RM	F	15.0	14.0
35547	35548	12	31	2002	7	DO	M	36.0	51.0

	month	day	year
0	7	16	1977
1	7	16	1977
2	7	16	1977
3	7	16	1977

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
29082	29083	1	16	1999	21	RM	M	16.0	8.0
29196	29197	2	20	1999	18	RM	M	18.0	8.0
29421	29422	3	15	1999	16	RM	M	15.0	8.0
29903	29904	10	10	1999	4	PP	M	20.0	7.0
29905	29906	10	10	1999	4	PP	M	21.0	4.0

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
0	1	7	16	1977	2	NL	M	32.0	NaN
1	2	7	16	1977	3	NL	M	33.0	NaN
2	3	7	16	1977	2	DM	F	37.0	NaN
3	4	7	16	1977	7	DM	M	36.0	NaN
4	5	7	16	1977	3	DM	M	35.0	NaN
...	...	...	...	...	...	...	...	...	...
35532	35533	12	31	2002	14	DM	F	36.0	48.0
35533	35534	12	31	2002	14	DM	M	37.0	56.0
35534	35535	12	31	2002	14	DM	M	37.0	53.0
35535	35536	12	31	2002	14	DM	F	35.0	42.0
35536	35537	12	31	2002	14	DM	F	36.0	46.0

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
62	63	8	19	1977	3	DM	M	35.0	40.0
63	64	8	19	1977	7	DM	M	37.0	48.0
64	65	8	19	1977	4	DM	F	34.0	29.0
65	66	8	19	1977	4	DM	F	35.0	46.0
66	67	8	19	1977	7	DM	M	35.0	36.0
...	...	...	...	...	...	...	...	...	...
35540	35541	12	31	2002	15	PB	F	24.0	31.0
35541	35542	12	31	2002	15	PB	F	26.0	29.0
35542	35543	12	31	2002	15	PB	F	27.0	34.0
35546	35547	12	31	2002	10	RM	F	15.0	14.0
35547	35548	12	31	2002	7	DO	M	36.0	51.0