Created by SmirkyGraphs. Code: Github. Source: OpenPVD.
Providence Property Taxes 2019 Analysis¶
Providence posts a spreadsheet of their full property tax roll every year on OpenPVD. This notebook will be used to view the changes between 2018 and 2019 by 3 geographic regions using Zip codes, Wards, and Neighborhoods. The goal is to see how much taxes have increased or decreased for each region and how much they pay in taxes compared to one another.
This data has 356 parcels removed, due to them not being in the parcel shapefile
The ward, zip code and neighborhoods are based on the centroids of the parcel
In [1]:
import pandas as pd
import numpy as np
In [2]:
# load data (this dataset has 356 properties removed due to no location info)
df = pd.read_csv('./data/reports/2019_pvd_property_tax_clean.csv')
In [3]:
# get total taxes for 2018 -> $280,423,906.64
total_2018 = df.query('year == 2018 & taxes > 0')['taxes'].sum()
# get total taxes for 2019 -> $284,756,820.28
total_2019 = df.query('year == 2019 & taxes > 0')['taxes'].sum()
# get percent increase
change = round(((total_2019 - total_2018)/total_2018)*100, 2)
print(f'total taxes 2018: {total_2018}')
print(f'total taxes 2019: {total_2019}')
print(f' change: {change}%')
In [4]:
increase = df[(df['tax_chng'] > 0) & (df['year']==2019)].shape[0]
decrease = df[(df['tax_chng'] < 0) & (df['year']==2019)].shape[0]
same = df[(df['tax_chng'] == 0) & (df['year']==2019)].shape[0]
exempt = df[(df['assessment'] == df['exemption']) & (df['year']==2019)].shape[0]
print(f'increased: {increase}')
print(f'decreased: {decrease}')
print(f' same: {same}')
print(f' exempt: {exempt}')
In [5]:
# remove exempt properties from the dataset
df = df.query('taxes > 0')
In [6]:
# 2-5 Family and Single Family Homes make up ~45% of taxes and will be the focus of this
table = df.pivot_table(index='class', values='taxes', aggfunc='sum', columns='year')
table = table/table.sum()
table = table.sort_values(by=2019, ascending=False)
table[2018] = table[2018].map(lambda n: '{:,.1%}'.format(n))
table[2019] = table[2019].map(lambda n: '{:,.1%}'.format(n))
table
Out[6]:
From here down only single family and 2-5 family homes are used.¶
In [7]:
# filter for only single family and 2-5 family homes
df = df[(df['class'] == 'Single Family') | (df['class'] == '2 -5 Family')]
In [8]:
# this table will show the median assessment for each zip code
table = df.pivot_table(index='zip_codes', values='assessment', aggfunc='median', columns='year')
table.sort_values(by=2019, ascending=False)
Out[8]:
Median Assessment Change from 2018 to 2019 by Zip Code¶
In [9]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='zip_codes', values=['assess_chng', 'assess_%_chng'], aggfunc='median')
table = table.sort_values(by=('assess_%_chng'), ascending=False)
table['assess_%_chng'] = table['assess_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table
Out[9]:
Median Assessment per Square Feet by Zip Code¶
In [10]:
# this table shows the median assessment per square foot
table = df.pivot_table(index='zip_codes', values='assess_pr_sqrft', aggfunc='median', columns='year')
table[2018] = round(table[2018], 2)
table[2019] = round(table[2019], 2)
table.sort_values(by=2019, ascending=False)
Out[10]:
Median Assessment per Square Feet Change from 2018 to 2019 by Zip Code¶
In [11]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased (for square footage)
table = df.pivot_table(index='zip_codes', values=['assess_pr_sqrft_%_chng', 'assess_pr_sqrft_chng'], aggfunc='median')
table['assess_pr_sqrft_%_chng'] = table['assess_pr_sqrft_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table['assess_pr_sqrft_chng'] = round(table['assess_pr_sqrft_chng'], 2)
table.sort_values(by=('assess_pr_sqrft_%_chng'), ascending=False)
Out[11]:
In [12]:
# getting table of % increase/decrease for 2019
table = df.query('year > 2018').copy()
# add a basic count
table['count'] = 1
# get increase, decrease or same
table.loc[table['tax_chng'] > 0, 'change'] = 'increase'
table.loc[table['tax_chng'] == 0, 'change'] = 'same'
table.loc[table['tax_chng'] < 0, 'change'] = 'decrease'
table = table.pivot_table(index='zip_codes', values='count', aggfunc='count', columns='change')
table = table.div(table.sum(axis=1), axis=0).sort_values(by='increase', ascending=False)
table['decrease'] = table['decrease'].map(lambda n: '{:,.1%}'.format(n))
table['increase'] = table['increase'].map(lambda n: '{:,.1%}'.format(n))
table.sort_values(by='increase', ascending=False)
Out[12]:
Total Property Tax Burden by Zip Code¶
In [13]:
# this table will show zip codes as a percent of total taxes payed
table = df.pivot_table(index='zip_codes', values='assessment', aggfunc='sum', columns='year')
# getting table as percentages
table = table/table.sum()
table = table.sort_values(by=2019, ascending=False)
table[2018] = table[2018].map(lambda n: '{:,.1%}'.format(n))
table[2019] = table[2019].map(lambda n: '{:,.1%}'.format(n))
table
Out[13]:
Median Taxes By Zip Code¶
In [14]:
# this table will show the median taxes due for each zip code
table = df.pivot_table(index='zip_codes', values='taxes', aggfunc='median', columns='year')
table.sort_values(by=2019, ascending=False)
Out[14]:
Median Taxes Change from 2018 to 2019 by Zip Code¶
In [15]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='zip_codes', values=['tax_chng', 'tax_%_chng'], aggfunc='median')
table = table.sort_values(by=('tax_%_chng'), ascending=False)
table['tax_%_chng'] = table['tax_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table
Out[15]:
Median Tax per Square Feet by Zip Code¶
In [16]:
# this table will show median tax per sqrft
table = df.pivot_table(index='zip_codes', values='tax_pr_sqrft', aggfunc='median', columns='year')
table[2018] = round(table[2018], 2)
table[2019] = round(table[2019], 2)
table.sort_values(by=2019, ascending=False)
Out[16]:
Median Tax per Square Feet Change from 2018 to 2019 by Zip Code¶
In [17]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='zip_codes', values=['taxes_pr_sqrft_%_chng', 'taxes_pr_sqrft_chng'], aggfunc='median')
table = table.sort_values(by=('taxes_pr_sqrft_%_chng'), ascending=False)
table['taxes_pr_sqrft_%_chng'] = table['taxes_pr_sqrft_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table['taxes_pr_sqrft_chng'] = round(table['taxes_pr_sqrft_chng'], 2)
table
Out[17]:
In [18]:
# this table will show the median assessment for each zip code
table = df.pivot_table(index='ward', values='assessment', aggfunc='median', columns='year')
table.sort_values(by=2019, ascending=False)
Out[18]:
Median Assessment Change from 2018 to 2019 by Ward¶
In [19]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='ward', values=['assess_chng', 'assess_%_chng'], aggfunc='median')
table = table.sort_values(by=('assess_%_chng'), ascending=False)
table['assess_%_chng'] = table['assess_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table
Out[19]:
Median Assessment per Square Feet by Ward¶
In [20]:
# this table shows the median assessment per square foot
table = df.pivot_table(index='ward', values='assess_pr_sqrft', aggfunc='median', columns='year')
table[2018] = round(table[2018], 2)
table[2019] = round(table[2019], 2)
table.sort_values(by=2019, ascending=False)
Out[20]:
Median Assessment per Square Feet Change from 2018 to 2019 by Ward¶
In [21]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased (for square footage)
table = df.pivot_table(index='ward', values=['assess_pr_sqrft_%_chng', 'assess_pr_sqrft_chng'], aggfunc='median')
table['assess_pr_sqrft_%_chng'] = table['assess_pr_sqrft_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table['assess_pr_sqrft_chng'] = round(table['assess_pr_sqrft_chng'], 2)
table.sort_values(by=('assess_pr_sqrft_%_chng'), ascending=False)
Out[21]:
In [22]:
# getting table of % increase/decrease for 2019
table = df.query('year > 2018').copy()
# add a basic count
table['count'] = 1
# get increase, decrease or same
table.loc[table['tax_chng'] > 0, 'change'] = 'increase'
table.loc[table['tax_chng'] == 0, 'change'] = 'same'
table.loc[table['tax_chng'] < 0, 'change'] = 'decrease'
table = table.pivot_table(index='ward', values='count', aggfunc='count', columns='change')
table = table.div(table.sum(axis=1), axis=0).sort_values(by='increase', ascending=False)
table['decrease'] = table['decrease'].map(lambda n: '{:,.1%}'.format(n))
table['increase'] = table['increase'].map(lambda n: '{:,.1%}'.format(n))
table.sort_values(by='increase', ascending=False)
Out[22]:
Total Property Tax Burden by Ward¶
In [23]:
# this table will show zip codes as a percent of total taxes payed
table = df.pivot_table(index='ward', values='assessment', aggfunc='sum', columns='year')
# getting table as percentages
table = table/table.sum()
table = table.sort_values(by=2019, ascending=False)
table[2018] = table[2018].map(lambda n: '{:,.1%}'.format(n))
table[2019] = table[2019].map(lambda n: '{:,.1%}'.format(n))
table
Out[23]:
Median Taxes By Ward¶
In [24]:
# this table will show the median taxes due for each zip code
table = df.pivot_table(index='ward', values='taxes', aggfunc='median', columns='year')
table.sort_values(by=2019, ascending=False)
Out[24]:
Median Taxes Change from 2018 to 2019 by Ward¶
In [25]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='ward', values=['tax_chng', 'tax_%_chng'], aggfunc='median')
table = table.sort_values(by=('tax_%_chng'), ascending=False)
table['tax_%_chng'] = table['tax_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table
Out[25]:
Median Tax per Square Feet by Ward¶
In [26]:
# this table will show median tax per sqrft
table = df.pivot_table(index='ward', values='tax_pr_sqrft', aggfunc='median', columns='year')
table[2018] = round(table[2018], 2)
table[2019] = round(table[2019], 2)
table.sort_values(by=2019, ascending=False)
Out[26]:
Median Tax per Square Feet Change from 2018 to 2019 by Ward¶
In [27]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='ward', values=['taxes_pr_sqrft_%_chng', 'taxes_pr_sqrft_chng'], aggfunc='median')
table = table.sort_values(by=('taxes_pr_sqrft_%_chng'), ascending=False)
table['taxes_pr_sqrft_%_chng'] = table['taxes_pr_sqrft_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table['taxes_pr_sqrft_chng'] = round(table['taxes_pr_sqrft_chng'], 2)
table
Out[27]:
In [28]:
# this table will show the median assessment for each zip code
table = df.pivot_table(index='neighborhood', values='assessment', aggfunc='median', columns='year')
table.sort_values(by=2019, ascending=False)
Out[28]:
Median Assessment Change from 2018 to 2019 by Neighborhood¶
In [29]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='neighborhood', values=['assess_chng', 'assess_%_chng'], aggfunc='median')
table = table.sort_values(by=('assess_%_chng'), ascending=False)
table['assess_%_chng'] = table['assess_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table
Out[29]:
Median Assessment per Square Feet by Neighborhood¶
In [30]:
# this table shows the median assessment per square foot
table = df.pivot_table(index='neighborhood', values='assess_pr_sqrft', aggfunc='median', columns='year')
table[2018] = round(table[2018], 2)
table[2019] = round(table[2019], 2)
table.sort_values(by=2019, ascending=False)
Out[30]:
Median Assessment per Square Feet Change from 2018 to 2019 by Neighborhood¶
In [31]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased (for square footage)
table = df.pivot_table(index='neighborhood', values=['assess_pr_sqrft_%_chng', 'assess_pr_sqrft_chng'], aggfunc='median')
table['assess_pr_sqrft_%_chng'] = table['assess_pr_sqrft_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table['assess_pr_sqrft_chng'] = round(table['assess_pr_sqrft_chng'], 2)
table.sort_values(by=('assess_pr_sqrft_%_chng'), ascending=False)
Out[31]:
In [32]:
# getting table of % increase/decrease for 2019
table = df.query('year > 2018').copy()
# add a basic count
table['count'] = 1
# get increase, decrease or same
table.loc[table['tax_chng'] > 0, 'change'] = 'increase'
table.loc[table['tax_chng'] == 0, 'change'] = 'same'
table.loc[table['tax_chng'] < 0, 'change'] = 'decrease'
table = table.pivot_table(index='neighborhood', values='count', aggfunc='count', columns='change')
table = table.div(table.sum(axis=1), axis=0).sort_values(by='increase', ascending=False)
table['decrease'] = table['decrease'].map(lambda n: '{:,.1%}'.format(n))
table['increase'] = table['increase'].map(lambda n: '{:,.1%}'.format(n))
table.sort_values(by='increase', ascending=False)
Out[32]:
Total Property Tax Burden by Neighborhood¶
In [33]:
# this table will show zip codes as a percent of total taxes payed
table = df.pivot_table(index='neighborhood', values='assessment', aggfunc='sum', columns='year')
# getting table as percentages
table = table/table.sum()
table = table.sort_values(by=2019, ascending=False)
table[2018] = table[2018].map(lambda n: '{:,.1%}'.format(n))
table[2019] = table[2019].map(lambda n: '{:,.1%}'.format(n))
table
Out[33]:
Median Taxes By Neighborhood¶
In [34]:
# this table will show the median taxes due for each zip code
table = df.pivot_table(index='neighborhood', values='taxes', aggfunc='median', columns='year')
table.sort_values(by=2019, ascending=False)
Out[34]:
Median Taxes Change from 2018 to 2019 by Neighborhood¶
In [35]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='neighborhood', values=['tax_chng', 'tax_%_chng'], aggfunc='median')
table = table.sort_values(by=('tax_%_chng'), ascending=False)
table['tax_%_chng'] = table['tax_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table
Out[35]:
Median Tax per Square Feet by Neighborhood¶
In [36]:
# this table will show median tax per sqrft
table = df.pivot_table(index='neighborhood', values='tax_pr_sqrft', aggfunc='median', columns='year')
table[2018] = round(table[2018], 2)
table[2019] = round(table[2019], 2)
table.sort_values(by=2019, ascending=False)
Out[36]:
Median Tax per Square Feet Change from 2018 to 2019 by Neighborhood¶
In [37]:
# this table shows the median difference between 2018 & 2019 and the median percentage increased
table = df.pivot_table(index='neighborhood', values=['taxes_pr_sqrft_%_chng', 'taxes_pr_sqrft_chng'], aggfunc='median')
table = table.sort_values(by=('taxes_pr_sqrft_%_chng'), ascending=False)
table['taxes_pr_sqrft_%_chng'] = table['taxes_pr_sqrft_%_chng'].map(lambda n: '{:,.1%}'.format(n))
table['taxes_pr_sqrft_chng'] = round(table['taxes_pr_sqrft_chng'], 2)
table
Out[37]: