import pandas as pd
surveys_df = pd.read_csv("data/surveys.csv")
a = [1, 2, 3, 4, 5]
a[0]
1
a[5]
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-11-4a84d856522b> in <module> ----> 1 a[5] IndexError: list index out of range
a[len(a)]
surveys_df = pd.read_csv("data/surveys.csv")
surveys_df.head()
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN |
surveys_df.iloc[0:3, 1:4]
month | day | year | |
---|---|---|---|
0 | 7 | 16 | 1977 |
1 | 7 | 16 | 1977 |
2 | 7 | 16 | 1977 |
surveys_df.loc[[0, 10], :]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
10 | 11 | 7 | 16 | 1977 | 5 | DS | F | 53.0 | NaN |
surveys_df.loc[0, ['species_id', 'plot_id', 'weight']]
species_id NL plot_id 2 weight NaN Name: 0, dtype: object
surveys_df.loc[[0, 10, 35549], :]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-16-b2913a4edc78> in <module> ----> 1 surveys_df.loc[[0, 10, 35549], :] ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in __getitem__(self, key) 871 # AttributeError for IntervalTree get_value 872 pass --> 873 return self._getitem_tuple(key) 874 else: 875 # we by definition only have the 0th axis ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup) 1053 return self._multi_take(tup) 1054 -> 1055 return self._getitem_tuple_same_dim(tup) 1056 1057 def _get_label(self, label, axis: int): ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple_same_dim(self, tup) 748 continue 749 --> 750 retval = getattr(retval, self.name)._getitem_axis(key, axis=i) 751 # We should never have retval.ndim < self.ndim, as that should 752 # be handled by the _getitem_lowerdim call above. ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1097 raise ValueError("Cannot index with multidimensional key") 1098 -> 1099 return self._getitem_iterable(key, axis=axis) 1100 1101 # nested tuple slicing ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_iterable(self, key, axis) 1035 1036 # A collection of keys -> 1037 keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) 1038 return self.obj._reindex_with_indexers( 1039 {axis: [keyarr, indexer]}, copy=True, allow_dups=True ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1252 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) 1253 -> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) 1255 return keyarr, indexer 1256 ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing) 1314 with option_context("display.max_seq_items", 10, "display.width", 80): 1315 raise KeyError( -> 1316 "Passing list-likes to .loc or [] with any missing labels " 1317 "is no longer supported. " 1318 f"The following labels were missing: {not_found}. " KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Int64Index([35549], dtype='int64'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"
surveys_df[0:1]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
surveys_df[:4]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
surveys_df[:-1]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
35543 | 35544 | 12 | 31 | 2002 | 15 | US | NaN | NaN | NaN |
35544 | 35545 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
35545 | 35546 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
35546 | 35547 | 12 | 31 | 2002 | 10 | RM | F | 15.0 | 14.0 |
35547 | 35548 | 12 | 31 | 2002 | 7 | DO | M | 36.0 | 51.0 |
35548 rows × 9 columns
surveys_df.iloc[0:4, 1:4]
month | day | year | |
---|---|---|---|
0 | 7 | 16 | 1977 |
1 | 7 | 16 | 1977 |
2 | 7 | 16 | 1977 |
3 | 7 | 16 | 1977 |
surveys_df.loc[0:4, 1:4]
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-21-aed7dd61eef6> in <module> ----> 1 surveys_df.loc[0:4, 1:4] ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in __getitem__(self, key) 871 # AttributeError for IntervalTree get_value 872 pass --> 873 return self._getitem_tuple(key) 874 else: 875 # we by definition only have the 0th axis ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup) 1053 return self._multi_take(tup) 1054 -> 1055 return self._getitem_tuple_same_dim(tup) 1056 1057 def _get_label(self, label, axis: int): ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_tuple_same_dim(self, tup) 748 continue 749 --> 750 retval = getattr(retval, self.name)._getitem_axis(key, axis=i) 751 # We should never have retval.ndim < self.ndim, as that should 752 # be handled by the _getitem_lowerdim call above. ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1086 if isinstance(key, slice): 1087 self._validate_key(key, axis) -> 1088 return self._get_slice_axis(key, axis=axis) 1089 elif com.is_bool_indexer(key): 1090 return self._getbool_axis(key, axis=axis) ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis) 1121 labels = obj._get_axis(axis) 1122 indexer = labels.slice_indexer( -> 1123 slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc" 1124 ) 1125 ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in slice_indexer(self, start, end, step, kind) 4960 slice(1, 3, None) 4961 """ -> 4962 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) 4963 4964 # return a slice ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind) 5161 start_slice = None 5162 if start is not None: -> 5163 start_slice = self.get_slice_bound(start, "left", kind) 5164 if start_slice is None: 5165 start_slice = 0 ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind) 5073 # For datetime indices label may be a string that has to be converted 5074 # to datetime boundary according to its resolution. -> 5075 label = self._maybe_cast_slice_bound(label, side, kind) 5076 5077 # we need to look up the label ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in _maybe_cast_slice_bound(self, label, side, kind) 5025 # this is rejected (generally .loc gets you here) 5026 elif is_integer(label): -> 5027 self._invalid_indexer("slice", label) 5028 5029 return label ~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in _invalid_indexer(self, form, key) 3262 """ 3263 raise TypeError( -> 3264 f"cannot do {form} indexing on {type(self).__name__} with these " 3265 f"indexers [{key}] of type {type(key).__name__}" 3266 ) TypeError: cannot do slice indexing on Index with these indexers [1] of type int
surveys_df.head()
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN |
surveys_df[(surveys_df.year == 1999) & (surveys_df.weight <= 8)]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
29082 | 29083 | 1 | 16 | 1999 | 21 | RM | M | 16.0 | 8.0 |
29196 | 29197 | 2 | 20 | 1999 | 18 | RM | M | 18.0 | 8.0 |
29421 | 29422 | 3 | 15 | 1999 | 16 | RM | M | 15.0 | 8.0 |
29903 | 29904 | 10 | 10 | 1999 | 4 | PP | M | 20.0 | 7.0 |
29905 | 29906 | 10 | 10 | 1999 | 4 | PP | M | 21.0 | 4.0 |
surveys_df[surveys_df['species_id'].isin(['DM','NL'])]
test = ['DM','NL']
surveys_df[surveys_df['species_id'].isin(test)]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
35532 | 35533 | 12 | 31 | 2002 | 14 | DM | F | 36.0 | 48.0 |
35533 | 35534 | 12 | 31 | 2002 | 14 | DM | M | 37.0 | 56.0 |
35534 | 35535 | 12 | 31 | 2002 | 14 | DM | M | 37.0 | 53.0 |
35535 | 35536 | 12 | 31 | 2002 | 14 | DM | F | 35.0 | 42.0 |
35536 | 35537 | 12 | 31 | 2002 | 14 | DM | F | 36.0 | 46.0 |
11848 rows × 9 columns
surveys_df[(surveys_df.weight > 0) | (surveys_df.weight == 0)]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
62 | 63 | 8 | 19 | 1977 | 3 | DM | M | 35.0 | 40.0 |
63 | 64 | 8 | 19 | 1977 | 7 | DM | M | 37.0 | 48.0 |
64 | 65 | 8 | 19 | 1977 | 4 | DM | F | 34.0 | 29.0 |
65 | 66 | 8 | 19 | 1977 | 4 | DM | F | 35.0 | 46.0 |
66 | 67 | 8 | 19 | 1977 | 7 | DM | M | 35.0 | 36.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
35540 | 35541 | 12 | 31 | 2002 | 15 | PB | F | 24.0 | 31.0 |
35541 | 35542 | 12 | 31 | 2002 | 15 | PB | F | 26.0 | 29.0 |
35542 | 35543 | 12 | 31 | 2002 | 15 | PB | F | 27.0 | 34.0 |
35546 | 35547 | 12 | 31 | 2002 | 10 | RM | F | 15.0 | 14.0 |
35547 | 35548 | 12 | 31 | 2002 | 7 | DO | M | 36.0 | 51.0 |
32283 rows × 9 columns
surveys_df[~(surveys_df.sex == 'M')]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN |
8 | 9 | 7 | 16 | 1977 | 1 | DM | F | 34.0 | NaN |
9 | 10 | 7 | 16 | 1977 | 6 | PF | F | 20.0 | NaN |
10 | 11 | 7 | 16 | 1977 | 5 | DS | F | 53.0 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
35543 | 35544 | 12 | 31 | 2002 | 15 | US | NaN | NaN | NaN |
35544 | 35545 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
35545 | 35546 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
35546 | 35547 | 12 | 31 | 2002 | 10 | RM | F | 15.0 | 14.0 |
35548 | 35549 | 12 | 31 | 2002 | 5 | NaN | NaN | NaN | NaN |
18201 rows × 9 columns
surveys_df[~(surveys_df.sex == 'F')]
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN |
5 | 6 | 7 | 16 | 1977 | 1 | PF | M | 14.0 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
35543 | 35544 | 12 | 31 | 2002 | 15 | US | NaN | NaN | NaN |
35544 | 35545 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
35545 | 35546 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
35547 | 35548 | 12 | 31 | 2002 | 7 | DO | M | 36.0 | 51.0 |
35548 | 35549 | 12 | 31 | 2002 | 5 | NaN | NaN | NaN | NaN |
19859 rows × 9 columns
new_df = surveys_df[~(surveys_df.sex == 'F') & ~(surveys_df.sex == 'M')]
new_df.head()
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
13 | 14 | 7 | 16 | 1977 | 8 | DM | NaN | NaN | NaN |
18 | 19 | 7 | 16 | 1977 | 4 | PF | NaN | NaN | NaN |
33 | 34 | 7 | 17 | 1977 | 17 | DM | NaN | NaN | NaN |
56 | 57 | 7 | 18 | 1977 | 22 | DM | NaN | NaN | NaN |
76 | 77 | 8 | 19 | 1977 | 4 | SS | NaN | NaN | NaN |
x = pd.isnull(new_df['sex'])
len(x)
2511
new_df2 = surveys_df[(surveys_df.sex == 'F') | (surveys_df.sex == 'M') & (surveys_df.weight > 0)]
new_df2
record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
---|---|---|---|---|---|---|---|---|---|
2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN |
8 | 9 | 7 | 16 | 1977 | 1 | DM | F | 34.0 | NaN |
9 | 10 | 7 | 16 | 1977 | 6 | PF | F | 20.0 | NaN |
10 | 11 | 7 | 16 | 1977 | 5 | DS | F | 53.0 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
35540 | 35541 | 12 | 31 | 2002 | 15 | PB | F | 24.0 | 31.0 |
35541 | 35542 | 12 | 31 | 2002 | 15 | PB | F | 26.0 | 29.0 |
35542 | 35543 | 12 | 31 | 2002 | 15 | PB | F | 27.0 | 34.0 |
35546 | 35547 | 12 | 31 | 2002 | 10 | RM | F | 15.0 | 14.0 |
35547 | 35548 | 12 | 31 | 2002 | 7 | DO | M | 36.0 | 51.0 |
32569 rows × 9 columns
by_site_sex = new_df2.groupby(['plot_id', 'sex'])
site_sex_count = by_site_sex['weight'].mean()
spc = site_sex_count.unstack()
s_plot = spc.plot(kind='bar', stacked=True, title="Total weight by site and sex")
s_plot.set_ylabel("Weight")
s_plot.set_xlabel("Plot")
Text(0.5, 0, 'Plot')