Lab5-2_BA

Berent Aldikacti

09/21/20

Exercise

In [4]:
import sqlite3

# Create a SQL connection to our SQLite database
con = sqlite3.connect("data/portal_mammals.sqlite")

cur = con.cursor()

# The result of a "cursor.execute" can be iterated over by row
for row in cur.execute('SELECT * FROM species;'):
    print(row)

# Be sure to close the connection
con.close()
('AB', 'Amphispiza', 'bilineata', 'Bird')
('AH', 'Ammospermophilus', 'harrisi', 'Rodent')
('AS', 'Ammodramus', 'savannarum', 'Bird')
('BA', 'Baiomys', 'taylori', 'Rodent')
('CB', 'Campylorhynchus', 'brunneicapillus', 'Bird')
('CM', 'Calamospiza', 'melanocorys', 'Bird')
('CQ', 'Callipepla', 'squamata', 'Bird')
('CS', 'Crotalus', 'scutalatus', 'Reptile')
('CT', 'Cnemidophorus', 'tigris', 'Reptile')
('CU', 'Cnemidophorus', 'uniparens', 'Reptile')
('CV', 'Crotalus', 'viridis', 'Reptile')
('DM', 'Dipodomys', 'merriami', 'Rodent')
('DO', 'Dipodomys', 'ordii', 'Rodent')
('DS', 'Dipodomys', 'spectabilis', 'Rodent')
('DX', 'Dipodomys', 'sp.', 'Rodent')
('EO', 'Eumeces', 'obsoletus', 'Reptile')
('GS', 'Gambelia', 'silus', 'Reptile')
('NL', 'Neotoma', 'albigula', 'Rodent')
('NX', 'Neotoma', 'sp.', 'Rodent')
('OL', 'Onychomys', 'leucogaster', 'Rodent')
('OT', 'Onychomys', 'torridus', 'Rodent')
('OX', 'Onychomys', 'sp.', 'Rodent')
('PB', 'Chaetodipus', 'baileyi', 'Rodent')
('PC', 'Pipilo', 'chlorurus', 'Bird')
('PE', 'Peromyscus', 'eremicus', 'Rodent')
('PF', 'Perognathus', 'flavus', 'Rodent')
('PG', 'Pooecetes', 'gramineus', 'Bird')
('PH', 'Perognathus', 'hispidus', 'Rodent')
('PI', 'Chaetodipus', 'intermedius', 'Rodent')
('PL', 'Peromyscus', 'leucopus', 'Rodent')
('PM', 'Peromyscus', 'maniculatus', 'Rodent')
('PP', 'Chaetodipus', 'penicillatus', 'Rodent')
('PU', 'Pipilo', 'fuscus', 'Bird')
('PX', 'Chaetodipus', 'sp.', 'Rodent')
('RF', 'Reithrodontomys', 'fulvescens', 'Rodent')
('RM', 'Reithrodontomys', 'megalotis', 'Rodent')
('RO', 'Reithrodontomys', 'montanus', 'Rodent')
('RX', 'Reithrodontomys', 'sp.', 'Rodent')
('SA', 'Sylvilagus', 'audubonii', 'Rabbit')
('SB', 'Spizella', 'breweri', 'Bird')
('SC', 'Sceloporus', 'clarki', 'Reptile')
('SF', 'Sigmodon', 'fulviventer', 'Rodent')
('SH', 'Sigmodon', 'hispidus', 'Rodent')
('SO', 'Sigmodon', 'ochrognathus', 'Rodent')
('SS', 'Spermophilus', 'spilosoma', 'Rodent')
('ST', 'Spermophilus', 'tereticaudus', 'Rodent')
('SU', 'Sceloporus', 'undulatus', 'Reptile')
('SX', 'Sigmodon', 'sp.', 'Rodent')
('UL', 'Lizard', 'sp.', 'Reptile')
('UP', 'Pipilo', 'sp.', 'Bird')
('UR', 'Rodent', 'sp.', 'Rodent')
('US', 'Sparrow', 'sp.', 'Bird')
('ZL', 'Zonotrichia', 'leucophrys', 'Bird')
('ZM', 'Zenaida', 'macroura', 'Bird')
In [11]:
import sqlite3

# Create a SQL connection to our SQLite database
con = sqlite3.connect("data/portal_mammals.sqlite")

cur = con.cursor()

# Return all results of query
cur.execute('SELECT plot_id FROM plots WHERE plot_type="Control"')
cur.fetchall()
Out[11]:
[(2,), (4,), (8,), (11,), (12,), (14,), (17,), (22,)]
In [12]:
# Return first result of query
cur.execute('SELECT species FROM species WHERE taxa="Bird"')
cur.fetchone()
Out[12]:
('bilineata',)
In [13]:
# Be sure to close the connection
con.close()
In [14]:
import pandas as pd
import sqlite3

# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("data/portal_mammals.sqlite")
df = pd.read_sql_query("SELECT * from surveys", con)

# Verify that result of SQL query is stored in the dataframe
print(df.head())

con.close()
   record_id  month  day  year  plot_id species_id sex  hindfoot_length  \
0          1      7   16  1977        2         NL   M             32.0   
1          2      7   16  1977        3         NL   M             33.0   
2          3      7   16  1977        2         DM   F             37.0   
3          4      7   16  1977        7         DM   M             36.0   
4          5      7   16  1977        3         DM   M             35.0   

   weight  
0     NaN  
1     NaN  
2     NaN  
3     NaN  
4     NaN  

Challenge - SQL

  1. Create a query that contains survey data collected between 1998 - 2001 for observations of sex “male” or “female” that includes observation’s genus and species and site type for the sample. How many records are returned?
In [15]:
import pandas as pd
import sqlite3

# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("data/portal_mammals.sqlite")
df = pd.read_sql_query("SELECT * from surveys", con)
print(df.head())
con.close()
   record_id  month  day  year  plot_id species_id sex  hindfoot_length  \
0          1      7   16  1977        2         NL   M             32.0   
1          2      7   16  1977        3         NL   M             33.0   
2          3      7   16  1977        2         DM   F             37.0   
3          4      7   16  1977        7         DM   M             36.0   
4          5      7   16  1977        3         DM   M             35.0   

   weight  
0     NaN  
1     NaN  
2     NaN  
3     NaN  
4     NaN  
In [45]:
df2 = df[(df.year >= 1998) & (df.year <= 2001)]
df2 = df2[['sex','year','plot_id','species_id','record_id','weight']]
print(df2)
        sex  year  plot_id species_id  record_id  weight
27413     F  1998       12         DM      27414    41.0
27414     M  1998       12         DM      27415    43.0
27415     F  1998       12         DM      27416    41.0
27416     F  1998       20         PM      27417    21.0
27417     M  1998       19         OT      27418    18.0
...     ...   ...      ...        ...        ...     ...
33315  None  2001       11       None      33316     NaN
33316  None  2001       13       None      33317     NaN
33317  None  2001       14       None      33318     NaN
33318  None  2001       15       None      33319     NaN
33319  None  2001       16       None      33320     NaN

[5907 rows x 6 columns]
  1. Create a dataframe that contains the total number of observations (count) made for all years, and sum of observation weights for each site, ordered by site ID.
In [48]:
df_yearcount = df2.groupby(['year'])['record_id'].count().reset_index(name='record_count')
print(df_yearcount)
df_plotsum = df2.groupby(['plot_id'])['weight'].sum().reset_index(name='weight_sum')
print(df_plotsum)
   year  record_count
0  1998          1610
1  1999          1135
2  2000          1552
3  2001          1610
    plot_id  weight_sum
0         1     11794.0
1         2     19149.0
2         3      9494.0
3         4     10509.0
4         5      1966.0
5         6      7075.0
6         7      1244.0
7         8     10535.0
8         9     10854.0
9        10       312.0
10       11      9043.0
11       12     17577.0
12       13      6995.0
13       14      9768.0
14       15      3661.0
15       16      1545.0
16       17     14438.0
17       18      5541.0
18       19      6704.0
19       20      6969.0
20       21      7999.0
21       22      9535.0
22       23       530.0
23       24      3962.0

Challenge - Saving your work

  1. For each of the challenges in the previous challenge block, modify your code to save the results to their own tables in the portal database.
In [52]:
con = sqlite3.connect("data/portal_mammals.sqlite")
df2.to_sql("surveys19982001", con, if_exists="replace")
df_yearcount.to_sql("surveys-yearcount", con, if_exists="replace")
df_plotsum.to_sql("surveys-weightsum", con, if_exists="replace")
con.close()