Created by: SmirkyGraphs. Code: Github. Source: RI Legislature Site.
Rhode Island House and Senate Floor Votes by Year¶
The final days of a session can get a bit crazy for legislatures. The last day of 2016 saw a whopping 212 votes for the House and 127 for the Senate. This notebook will take a look at how many votes occur each year, when the session ends and how many votes occur in the final days of a session compared to the year as a whole for both the House and Senate.
Number of House Votes and Sessions by Year¶
In [1]:
import pandas as pd
import numpy as np
In [2]:
df = pd.read_csv('../data/clean/house_vote_info_clean.csv')
In [3]:
# adding cols to make data prep easier
df['count'] = 1
df['session'] = df['id'].str[2:6]
# remove roll call as it wasnt present in earlier years
df = df[df['vote_category'] != 'ROLL CALL']
# dates to remove (a tour and a possible test?)
df = df[df['bill_date'] != '2003-11-12']
df = df[df['bill_date'] != '2008-11-25']
In [4]:
# getting count of total votes for each individual session day
h_df = df.groupby(['session', 'bill_date'])['count'].count().reset_index()
# getting running total of floor votes for each session
h_df['votes_total'] = h_df.groupby(h_df['session'].ne(h_df['session'].shift()).cumsum()).cumsum()
# getting a running total of number of days for each session
h_df['day_id'] = h_df.groupby(h_df['session'].ne(h_df['session'].shift()).cumsum()).cumcount().add(1)
In [5]:
# previewing the table
h_df.head(2)
Out[5]:
In [6]:
# day with most house votes
h_df[h_df['count'] == h_df['count'].max()]
Out[6]:
In [7]:
# getting table of final days for each session
house_votes = h_df.groupby('session').last()
# add fitting headers and removing index label
cols = ['last_day', 'last_day_votes', 'total_votes', 'total_sessions']
house_votes.columns = cols
house_votes.columns.name = house_votes.index.name
house_votes.index.name = None
In [8]:
house_votes
Out[8]:
[House] Total Votes Final 10 Days vs. All Other Days¶
In [9]:
# getting count of total votes for each individual session day
end_df = df.groupby(['session', 'bill_date'])['count'].count().reset_index()
In [10]:
# getting running total of floor votes for each session
end_df['votes_total'] = end_df.groupby(end_df['session'].ne(end_df['session'].shift()).cumsum()).cumsum()
# getting a REVERSED running total of number of days for each session
end_df['day_id'] = end_df.groupby(end_df['session'].ne(end_df['session'].shift()).cumsum()).cumcount(ascending=False).add(1)
In [11]:
end_df.loc[end_df['day_id'] <= 10, 'final_10_days'] = 'final_10_days'
end_df['final_10_days'] = end_df['final_10_days'].fillna('other_days')
In [12]:
end_df = end_df.groupby(['session', 'final_10_days'])['count'].sum().reset_index()
end_df = end_df.pivot_table(columns='final_10_days', index='session', values='count')
end_df['total'] = end_df['final_10_days'] + end_df['other_days']
end_df['%_final_10'] = end_df['final_10_days']/end_df['total']
# removing total and cleaning % format
house_end_df = end_df.drop(columns='total')
house_end_df['%_final_10'] = house_end_df['%_final_10'].map(lambda x: '{:,.1%}'.format(x))
# add fitting headers and removing index label
house_end_df.columns.name = house_end_df.index.name
house_end_df.index.name = None
In [13]:
house_end_df
Out[13]:
Number of Senate Votes and Sessions by Year¶
In [14]:
df = pd.read_csv('../data/clean/senate_vote_info_clean.csv')
In [15]:
# adding cols to make data prep easier
df['count'] = 1
df['session'] = df['id'].str[2:6]
# remove roll call for normalization (did so for house)
df = df[df['vote_category'] != 'ROLL CALL']
In [16]:
# getting count of total votes for each individual session day
s_df = df.groupby(['session', 'bill_date'])['count'].count().reset_index()
# getting running total of floor votes for each session
s_df['votes_total'] = s_df.groupby(s_df['session'].ne(s_df['session'].shift()).cumsum()).cumsum()
# getting a running total of number of days for each session
s_df['day_id'] = s_df.groupby(s_df['session'].ne(s_df['session'].shift()).cumsum()).cumcount().add(1)
In [17]:
# previewing the table
s_df.head(2)
Out[17]:
In [18]:
# day with most senate votes
s_df[s_df['count'] == s_df['count'].max()]
Out[18]:
In [19]:
# getting table of final days for each session
senate_votes = s_df.groupby('session').last()
# add fitting headers and removing index label
cols = ['last_day', 'last_day_votes', 'total_votes', 'total_sessions']
senate_votes.columns = cols
senate_votes.columns.name = senate_votes.index.name
senate_votes.index.name = None
In [20]:
senate_votes
Out[20]:
[Senate] Total Votes Final 10 Days vs. All Other Days¶
In [21]:
# getting count of total votes for each individual session day
end_df = df.groupby(['session', 'bill_date'])['count'].count().reset_index()
In [22]:
# getting running total of floor votes for each session
end_df['votes_total'] = end_df.groupby(end_df['session'].ne(end_df['session'].shift()).cumsum()).cumsum()
# getting a REVERSED running total of number of days for each session
end_df['day_id'] = end_df.groupby(end_df['session'].ne(end_df['session'].shift()).cumsum()).cumcount(ascending=False).add(1)
In [23]:
end_df.loc[end_df['day_id'] <= 10, 'final_10_days'] = 'final_10_days'
end_df['final_10_days'] = end_df['final_10_days'].fillna('other_days')
In [24]:
end_df = end_df.groupby(['session', 'final_10_days'])['count'].sum().reset_index()
end_df = end_df.pivot_table(columns='final_10_days', index='session', values='count')
end_df['total'] = end_df['final_10_days'] + end_df['other_days']
end_df['%_final_10'] = end_df['final_10_days']/end_df['total']
# removing total and cleaning % format
senate_end_df = end_df.drop(columns='total')
senate_end_df['%_final_10'] = senate_end_df['%_final_10'].map(lambda x: '{:,.1%}'.format(x))
# add fitting headers and removing index label
senate_end_df.columns.name = senate_end_df.index.name
senate_end_df.index.name = None
In [25]:
senate_end_df
Out[25]:
Creating Export Report for Tableau¶
In [26]:
# adding chamber label to house sets
house_votes['chamber'] = 'house'
house_end_df['chamber'] = 'house'
# adding chamber label to senate sets
senate_votes['chamber'] = 'senate'
senate_end_df['chamber'] = 'senate'
In [27]:
final_10_days = pd.concat([house_end_df, senate_end_df])
total_floor_votes = pd.concat([house_votes, senate_votes])
final_10_days.index.name = 'session'
total_floor_votes.index.name = 'session'
final_10_days = final_10_days.reset_index()
total_floor_votes = total_floor_votes.reset_index()
final_10_days.to_csv('../data/reports/total-floor-votes/final_10_days.csv', index=False)
total_floor_votes.to_csv('../data/reports/total-floor-votes/total_floor_votes.csv', index=False)