import json
import math
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import os
import pandas as pd
import psycopg2
import requests
import gender_guesser.detector as gender
%matplotlib inline
plt.style.use('ggplot')
pd.options.display.float_format = '{:,.2f}'.format
def read_or_save(name, func):
path = 'pickles/' + name + '.pickle.gz'
if (os.path.isfile(path)):
return pd.read_pickle(path)
else:
result = func()
os.makedirs('pickles', exist_ok=True)
result.to_pickle(path)
return result
committee_id = "C00401224"
year_since = 2013
with open("config.json") as f:
conf = json.load(f)
conn = psycopg2.connect(
dbname=conf['dbname'],
user=conf['user'],
host=conf['host'],
password=conf['password']
)
conn.autocommit = True
Jan. 1, 2013 — now
filings_count = read_or_save(
'filings_count',
lambda: pd.read_sql("""
SELECT count(*)
FROM
(SELECT filing_id
FROM fec_filings
UNION SELECT filing_id
FROM fec_paper_filings) AS filings;
""", con=conn)
)
filings_count
Jan. 1, 2017 — now
transfer_committees = read_or_save(
'transfer_committees',
lambda: pd.read_sql("""
SELECT filer_committee_id_number,
fec_committees.name,
count,
sum
FROM
(SELECT filer_committee_id_number,
count(*) AS count,
sum(expenditure_amount) as sum
FROM fec_expenditures
JOIN fec_amended_filings using (filing_id)
WHERE
-- filer_committee_id_number != '""" + committee_id + """' AND
extract(YEAR
FROM expenditure_date) >= 2017
-- AND beneficiary_committee_fec_id IS NOT NULL
AND form_type = 'SB23'
GROUP BY filer_committee_id_number
ORDER BY COUNT DESC
LIMIT 10) as counts
LEFT JOIN fec_committees
ON fec_committees.id = filer_committee_id_number
""", con=conn)
)
transfer_committees
Jan. 1, 2013 — Sept. 30, 2014
def calc_candidate_unitemized_by_party(cycle):
return read_or_save(
'candidate_unitemized_by_party_' + str(cycle)[2:],
lambda: pd.read_sql("""
SELECT CASE
WHEN cand_pty_affiliation = 'DEM' THEN 'Democrat'
WHEN cand_pty_affiliation = 'DFL' THEN 'Democrat'
WHEN cand_pty_affiliation = 'REP' THEN 'Republican'
ELSE 'third party'
END AS party,
-- cand_office,
count(DISTINCT cand_id) as candidates,
sum(col_a_individual_contributions_itemized) AS itemized,
sum(col_a_individual_contributions_unitemized) AS unitemized,
sum(col_a_total_individual_contributions) AS indivs
FROM
(SELECT DISTINCT ON (cand_pcc) *
FROM fec_candidates
WHERE
cand_election_yr IN ('%(year_before)s','%(cycle)s') AND
cand_pcc IS NOT NULL
ORDER BY cand_pcc) AS cands
JOIN
(SELECT filing_id,
filer_committee_id_number,
col_a_individual_contributions_itemized,
col_a_individual_contributions_unitemized,
col_a_total_individual_contributions,
col_a_total_receipts,
col_a_total_disbursements,
coverage_from_date,
coverage_through_date
FROM fec_campaign_summaries
JOIN fec_amended_filings USING (filing_id)
WHERE filing_id >= 2000
UNION SELECT DISTINCT ON (filer_committee_id_number,
coverage_from_date,
coverage_through_date) filing_id,
filer_committee_id_number,
col_a_individual_contributions_itemized,
col_a_individual_contributions_unitemized,
col_a_total_individual_contributions,
col_a_total_receipts,
col_a_total_disbursements,
coverage_from_date,
coverage_through_date
FROM fec_paper_campaign_summaries
WHERE col_a_total_individual_contributions IS NOT NULL
ORDER BY filer_committee_id_number,
coverage_through_date,
coverage_from_date,
filing_id DESC) AS summaries ON cand_pcc = filer_committee_id_number
WHERE extract(YEAR
FROM coverage_through_date) >= %(year_before)s
AND coverage_through_date <= '%(cycle)s-09-30'::date
-- AND (col_a_total_receipts > 0
-- OR col_a_total_disbursements > 0)
GROUP BY -- cand_office,
party
ORDER BY indivs DESC
""", con=conn, params={"cycle":cycle,"year_before":cycle-1})
)
candidate_unitemized_by_party_14 = calc_candidate_unitemized_by_party(2014)
candidate_unitemized_by_party_14
Jan. 1, 2017 — Sept. 30, 2018
candidate_unitemized_by_party_18 = calc_candidate_unitemized_by_party(2018)
candidate_unitemized_by_party_18
candidate_unitemized_by_party_18[candidate_unitemized_by_party_18['party'] == 'Democrat']['unitemized'].sum()/candidate_unitemized_by_party_14[candidate_unitemized_by_party_14['party'] == 'Democrat']['unitemized'].sum()
Jan. 1, 2013 — Sept. 30, 2018
actblue_summaries = read_or_save(
'actblue_summaries',
lambda: pd.read_sql("""
SELECT *
FROM fec_pac_summaries
JOIN fec_amended_filings USING (filing_id)
WHERE filer_committee_id_number = '""" + committee_id + """'
AND extract(year from coverage_through_date) >= """ + str(year_since) + """
ORDER BY coverage_through_date DESC;
""", con=conn, parse_dates={
'coverage_from_date': '%Y-%m-%d',
'coverage_through_date': '%Y-%m-%d'
})
)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(actblue_summaries[[
'filing_id',
'coverage_from_date',
'coverage_through_date',
'report_code',
'col_a_total_contributions'
]])
Jan. 1, 2013 — now
def get_reports(committee_id):
base = "https://api.open.fec.gov/v1/"
url = base + "committee/" + committee_id + "/reports/?api_key=" + conf['fec_key'] + "&is_amended=False&per_page=100"
return pd.DataFrame(requests.get(url).json()['results'])
actblue_api_summaries = read_or_save('actblue_api_summaries', lambda: get_reports(committee_id))
pd.to_numeric(actblue_api_summaries['calendar_ytd'])
actblue_api_summaries = actblue_api_summaries[actblue_api_summaries['calendar_ytd'] >= year_since]
actblue_api_summaries[['file_number','calendar_ytd']]
pd.DataFrame(pd.concat([actblue_api_summaries['file_number'],
actblue_summaries['filing_id']]).drop_duplicates(keep=False))
Jan. 1, 2013 — now
def cycle(year):
return math.ceil(float(year) / 2.) * 2
assert (cycle(2013) == 2014 and
cycle(2014) == 2014 and
cycle(2015) == 2016 and
cycle(2016) == 2016 and
cycle(2017) == 2018 and
cycle(2018) == 2018),"unexpected cycle output"
actblue_contrib_count_by_year = read_or_save(
'actblue_contrib_count_by_year',
lambda: pd.read_sql("""
-- DROP MATERIALIZED VIEW actblue_contribs;
CREATE MATERIALIZED VIEW IF NOT EXISTS actblue_contribs AS
SELECT contribs.*,
fec_expenditures.election_code,
beneficiary_committee_fec_id,
beneficiary_candidate_fec_id,
beneficiary_candidate_state,
beneficiary_candidate_office,
coalesce(cand_comms.cand_id,cands.cand_id) AS cand_id,
coalesce(cand_comms.cand_pty_affiliation,cands.cand_pty_affiliation) AS cand_pty_affiliation,
coalesce(cand_comms.cand_office_st,cands.cand_office_st) AS cand_office_st,
coalesce(cand_comms.cand_office,cands.cand_office) AS cand_office,
coalesce(cand_comms.cand_pcc,cands.cand_pcc) AS cand_pcc
FROM
(SELECT fec_contributions.filing_id,
transaction_id,
contributor_last_name,
contributor_first_name,
contributor_street_1,
contributor_city,
contributor_state,
contributor_zip_code,
contribution_date,
contribution_amount,
contributor_employer,
contributor_occupation,
memo_text_description
FROM fec_contributions
WHERE filing_id IN
(SELECT filing_id
FROM fec_pac_summaries
JOIN fec_amended_filings USING (filing_id)
WHERE filer_committee_id_number = 'C00401224'
AND extract(YEAR
FROM coverage_through_date) >= 2013
ORDER BY coverage_through_date DESC)
AND form_type = 'SA11AI') AS contribs
LEFT JOIN fec_expenditures ON fec_expenditures.filing_id IN
(SELECT filing_id
FROM fec_pac_summaries
JOIN fec_amended_filings USING (filing_id)
WHERE filer_committee_id_number = 'C00401224'
AND extract(YEAR
FROM coverage_through_date) >= 2013
ORDER BY coverage_through_date DESC)
AND fec_expenditures.form_type = 'SB23'
AND 'SB23_' || replace(transaction_id,'SA11AI_','') = transaction_id_number
LEFT JOIN
(SELECT DISTINCT ON (cand_id) cand_id,
cand_pcc,
cand_pty_affiliation,
cand_office_st,
cand_office
FROM fec_candidates
ORDER BY cand_id,
cand_election_yr::int DESC) AS cands ON cands.cand_id = beneficiary_candidate_fec_id
AND beneficiary_committee_fec_id NOT IN
(SELECT id
FROM fec_committees
WHERE designation = 'J')
LEFT JOIN
(SELECT DISTINCT ON (cand_pcc) cand_id,
cand_pcc,
cand_pty_affiliation,
cand_office_st,
cand_office
FROM fec_candidates
ORDER BY cand_pcc,
cand_election_yr::int DESC) AS cand_comms ON cand_comms.cand_pcc = beneficiary_committee_fec_id;
GRANT ALL ON TABLE actblue_contribs TO redash_default;
GRANT ALL ON TABLE actblue_contribs TO politics;
-- REFRESH MATERIALIZED VIEW actblue_contribs;
-- ANALYZE actblue_contribs;
SELECT extract(YEAR
FROM contribution_date) AS YEAR,
count(*),
sum(contribution_amount)
FROM actblue_contribs
GROUP BY extract(YEAR
FROM contribution_date);
""", con=conn)
)
actblue_contrib_count_by_year['cycle'] = actblue_contrib_count_by_year[~actblue_contrib_count_by_year['year'].isna()]['year'].apply(cycle)
actblue_contrib_count_by_cycle = pd.DataFrame(
actblue_contrib_count_by_year.groupby(actblue_contrib_count_by_year['cycle'])['count','sum'].sum()
)
actblue_contrib_count_by_cycle
Jan. 1, 2013 — now
actblue_congress_contrib_count_by_year = read_or_save(
'actblue_congress_contrib_totals',
lambda: pd.read_sql("""
SELECT extract(YEAR
FROM contribution_date) AS YEAR,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE coalesce(beneficiary_candidate_office,cand_office) IN ('H','S')
GROUP BY extract(YEAR
FROM contribution_date);
""", con=conn)
)
actblue_congress_contrib_count_by_year['cycle'] = actblue_congress_contrib_count_by_year[~actblue_congress_contrib_count_by_year['year'].isna()]['year'].apply(cycle)
actblue_congress_contrib_count_by_cycle = pd.DataFrame(
actblue_congress_contrib_count_by_year.groupby(actblue_congress_contrib_count_by_year['cycle'])['count','sum'].sum()
)
actblue_congress_contrib_count_by_cycle
actblue_congress_contrib_count_by_cycle.to_csv('data/actblue_contribs_by_cycle.csv')
actblue_congress_contrib_totals_to_date = read_or_save(
'actblue_congress_contrib_totals_to_date',
lambda: pd.read_sql("""
SELECT extract(YEAR
FROM contribution_date) AS YEAR,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE coalesce(beneficiary_candidate_office,cand_office) IN ('H',
'S')
AND ((contribution_date >= '2017-01-01'
AND contribution_date <= '2018-09-30')
OR (contribution_date >= '2015-01-01'
AND contribution_date <= '2016-09-30')
OR (contribution_date >= '2013-01-01'
AND contribution_date <= '2014-09-30'))
GROUP BY extract(YEAR
FROM contribution_date);
""", con=conn)
)
actblue_congress_contrib_totals_to_date['cycle'] = actblue_congress_contrib_totals_to_date[~actblue_congress_contrib_totals_to_date['year'].isna()]['year'].apply(cycle)
actblue_congress_contrib_count_by_cycle_to_date = pd.DataFrame(
actblue_congress_contrib_totals_to_date.groupby(actblue_congress_contrib_totals_to_date['cycle'])['count','sum'].sum()
)
actblue_congress_contrib_count_by_cycle_to_date
Jan. 1, 2013 — now
contribs_over_time = actblue_summaries[['coverage_through_date','col_a_total_contributions']]
# https://scentellegher.github.io/programming/2017/05/24/pandas-bar-plot-with-formatted-dates.html
contribs_over_time.set_index('coverage_through_date',inplace=True)
fig, ax = plt.subplots(figsize=(12,7))
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
# https://matplotlib.org/examples/pylab_examples/custom_ticker1.html
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: '$%1.fM' % (x*1e-6)))
ax.bar(contribs_over_time.index, contribs_over_time['col_a_total_contributions'], width=5)
ax.set_title('Total contributions by reporting period')
actblue_contribs_by_day = read_or_save(
'actblue_contribs_by_day',
lambda: pd.read_sql("""
SELECT contribution_date,
sum(contribution_amount),
count(*)
FROM actblue_contribs
-- where extract(year from contribution_date) >= 2017 and
-- where cand_office in ('S','H')
GROUP BY contribution_date
""", con=conn)
)
actblue_contribs_by_day.head(10)
actblue_contribs_by_day.to_csv('data/actblue_contribs_by_day.csv')
Spreadsheet: actblue_contribs_by_day.csv
actblue_congress_contribs_by_day = read_or_save(
'actblue_congress_contribs_by_day',
lambda: pd.read_sql("""
SELECT contribution_date,
sum(contribution_amount),
count(*)
FROM actblue_contribs
-- where extract(year from contribution_date) >= 2017 and
where coalesce(beneficiary_candidate_office,cand_office) in ('S','H')
GROUP BY contribution_date
""", con=conn)
)
actblue_congress_contribs_by_day.head(10)
actblue_congress_contribs_by_day.to_csv('data/actblue_congress_contribs_by_day.csv')
Spreadsheet: actblue_congress_contribs_by_day.csv
contribs_by_day = actblue_congress_contribs_by_day[['contribution_date','sum']]
# https://scentellegher.github.io/programming/2017/05/24/pandas-bar-plot-with-formatted-dates.html
contribs_by_day.set_index('contribution_date',inplace=True)
fig, ax = plt.subplots(figsize=(12,7))
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
# https://matplotlib.org/examples/pylab_examples/custom_ticker1.html
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: '$%1.fM' % (x*1e-6)))
ax.bar(contribs_by_day.index, contribs_by_day['sum'], width=5)
ax.set_title('Congressional contribution totals by day')
Jan. 1, 2013 — now
actblue_summaries['cycle'] = actblue_summaries.coverage_through_date.dt.year.apply(cycle)
contribs_by_cycle = pd.DataFrame(
actblue_summaries.groupby(actblue_summaries['cycle'])['col_a_total_contributions'].sum()
)
contribs_by_cycle
actblue_summaries['cycle'] = actblue_summaries.coverage_through_date.dt.year.apply(cycle)
actblue_summaries_to_date = actblue_summaries[((actblue_summaries['coverage_through_date'] >= '2017-01-01') &
(actblue_summaries['coverage_through_date'] <= '2018-09-30')) |
((actblue_summaries['coverage_through_date'] >= '2015-01-01') &
(actblue_summaries['coverage_through_date'] <= '2016-09-30')) |
((actblue_summaries['coverage_through_date'] >= '2013-01-01') &
(actblue_summaries['coverage_through_date'] <= '2014-09-30'))]
contribs_by_cycle = pd.DataFrame(
actblue_summaries_to_date.groupby(actblue_summaries_to_date['cycle'])['col_a_total_contributions'].sum()
)
contribs_by_cycle
# https://scentellegher.github.io/programming/2017/05/24/pandas-bar-plot-with-formatted-dates.html
# contribs_by_year.set_index('coverage_through_date',inplace=True)
fig, ax = plt.subplots(figsize=(10,7))
ax.xaxis.set_major_locator(mdates.YearLocator())
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
# https://matplotlib.org/examples/pylab_examples/custom_ticker1.html
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: '$%1.fM' % (x*1e-6)))
ax.bar(contribs_by_cycle.index, contribs_by_cycle['col_a_total_contributions'], width=0.8)
ax.set_title('Total contributions by cycle')
Jan. 1, 2013 — now
actblue_transfers = read_or_save(
'actblue_transfers',
lambda: pd.read_sql("""
SELECT fec_committees.name,
totals.*
FROM
(SELECT beneficiary_committee_fec_id,
count(*),
sum(contribution_amount)
FROM actblue_contribs
GROUP BY beneficiary_committee_fec_id) AS totals
LEFT JOIN fec_committees ON beneficiary_committee_fec_id = fec_committees.id
""", con=conn)
)
actblue_transfers.sort_values(by=['sum'], ascending=False).head(20)
Jan. 1, 2013 — now
actblue_itself = read_or_save(
'actblue_itself',
lambda: pd.read_sql("""
SELECT count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE memo_text_description = 'Contribution to ActBlue'
""", con=conn)
)
actblue_itself
Jan. 1, 2017 — Sept. 30, 2018
actblue_proportion = read_or_save(
'actblue_proportion',
lambda: pd.read_sql("""
SELECT actblue,
count(*)
FROM
(SELECT cand_id,
cand_pcc,
CASE
WHEN COUNT IS NOT NULL THEN TRUE
ELSE FALSE
END AS actblue
FROM (SELECT DISTINCT ON (cand_id,
cand_pcc) *
FROM fec_candidates
WHERE cand_election_yr IN ('2018','2017')
AND cand_pcc IS NOT NULL
AND cand_pty_affiliation IN ('DFL',
'DEM')
ORDER BY cand_id,
cand_pcc) AS cands
LEFT JOIN
(SELECT fec_committees.name,
totals.*
FROM
(SELECT beneficiary_committee_fec_id,
count(*) AS COUNT,
sum(contribution_amount)
FROM actblue_contribs
WHERE extract(YEAR
FROM contribution_date) >= 2017
GROUP BY beneficiary_committee_fec_id) AS totals
LEFT JOIN fec_committees ON beneficiary_committee_fec_id = fec_committees.id) AS actblue_committees ON cand_pcc = beneficiary_committee_fec_id) AS actblue_candidates
GROUP BY actblue
""", con=conn)
)
actblue_proportion['percent'] = actblue_proportion['count']/actblue_proportion['count'].sum()*100
actblue_proportion
def calc_actblue_proportion_amount(cycle):
return read_or_save(
'actblue_proportion_amount_' + str(cycle)[2:],
lambda: pd.read_sql('''
WITH cands AS
(SELECT DISTINCT ON (cand_pcc) *
FROM fec_candidates
WHERE cand_election_yr IN ('%(year_before)s',
'%(cycle)s')
AND cand_pty_affiliation IN ('DFL',
'DEM')
AND cand_pcc IS NOT NULL
ORDER BY cand_pcc),
candidate_totals AS
(SELECT cand_id,
cand_pcc,
cand_name,
cand_office,
cand_office_st,
cand_office_district,
cand_election_yr,
coverage_from_date,
coverage_through_date,
col_a_total_individual_contributions
FROM cands
JOIN fec_campaign_summaries ON cand_pcc = filer_committee_id_number
JOIN fec_amended_filings USING (filing_id)
WHERE extract(YEAR
FROM coverage_through_date) >= %(year_before)s
AND coverage_through_date <= '%(cycle)s-09-30'::date
AND filing_id >= 2000
UNION SELECT cand_id,
cand_pcc,
cand_name,
cand_office,
cand_office_st,
cand_office_district,
cand_election_yr,
coverage_from_date,
coverage_through_date,
col_a_total_individual_contributions
FROM cands
JOIN
(SELECT DISTINCT ON (filer_committee_id_number,
coverage_from_date,
coverage_through_date) filing_id,
filer_committee_id_number,
col_a_total_individual_contributions,
coverage_from_date,
coverage_through_date
FROM fec_paper_campaign_summaries
WHERE col_a_total_individual_contributions IS NOT NULL
ORDER BY filer_committee_id_number,
coverage_through_date,
coverage_from_date,
filing_id DESC) AS paper_totals ON cand_pcc = filer_committee_id_number
WHERE extract(YEAR
FROM coverage_through_date) >= %(year_before)s
AND coverage_through_date <= '%(cycle)s-09-30'::date)
SELECT cand_pcc,
max(cand_name) as cand_name,
max(cand_office) as cand_office,
max(cand_office_st) as cand_office_st,
max(cand_office_district) as cand_office_district,
max(cand_election_yr) as cand_election_yr,
array_agg(coverage_from_date) AS from_dates,
array_agg(coverage_through_date) AS through_dates,
sum(col_a_total_individual_contributions) AS committee_total,
sum(actblue_total) AS actblue_total
FROM
(SELECT candidate_totals.cand_pcc,
max(candidate_totals.cand_name) as cand_name,
max(candidate_totals.cand_office) as cand_office,
max(candidate_totals.cand_office_st) as cand_office_st,
max(candidate_totals.cand_office_district) as cand_office_district,
max(candidate_totals.cand_election_yr) as cand_election_yr,
coverage_from_date,
coverage_through_date,
col_a_total_individual_contributions,
sum(contribution_amount) AS actblue_total,
count(*) as actblue_count
FROM candidate_totals
LEFT JOIN actblue_contribs ON extract(YEAR
FROM coverage_through_date) >= %(year_before)s
AND coverage_through_date <= '%(cycle)s-09-30'::date
AND actblue_contribs.cand_pcc = candidate_totals.cand_pcc
AND coverage_from_date <= contribution_date
AND coverage_through_date >= contribution_date
GROUP BY candidate_totals.cand_pcc,
col_a_total_individual_contributions,
coverage_from_date,
coverage_through_date) AS totals
GROUP BY cand_pcc
''',con=conn,params={"cycle":cycle,"year_before":cycle-1})
)
actblue_proportion_amount = calc_actblue_proportion_amount(2018)
actblue_proportion_amount['percent'] = actblue_proportion_amount['actblue_total']/actblue_proportion_amount['committee_total']
str(round(actblue_proportion_amount['actblue_total'].sum()/actblue_proportion_amount['committee_total'].sum()*100,2)) + " percent of contributions from individuals to Democratic campaigns came through ActBlue in the 2018 cycle (Jan. 1, 2017 — Sept/. 30, 2018)"
actblue_proportion_amount_14 = calc_actblue_proportion_amount(2014)
str(round(actblue_proportion_amount_14['actblue_total'].sum()/actblue_proportion_amount_14['committee_total'].sum()*100,2)) + " percent of contributions from individuals to Democratic campaigns came through ActBlue in the 2014 cycle (Jan. 1, 2013 — Sept. 30, 2014)"
actblue_proportion_amount[(~actblue_proportion_amount['percent'].isna()) & (actblue_proportion_amount['committee_total'] > 30000)].sort_values(by=['actblue_total'],ascending=False).head(20).sort_values(by=['percent'],ascending=False)
actblue_proportion_amount[(~actblue_proportion_amount['percent'].isna()) & (actblue_proportion_amount['percent'] <= 1) & (actblue_proportion_amount['committee_total'] > 20000)].sort_values(by=['percent'],ascending=False).to_csv('data/actblue_proportion.csv')
actblue_cand_totals = read_or_save(
'actblue_cand_totals',
lambda: pd.read_sql("""
WITH cands AS
(SELECT DISTINCT ON (cand_id) *
FROM fec_candidates
WHERE cand_election_yr IN ('2017',
'2018')
AND cand_pcc IS NOT NULL
ORDER BY cand_id)
SELECT cands.*,
totals.*
FROM
(SELECT cand_id,
sum(CASE WHEN contributor_state != coalesce(cand_office_st,beneficiary_candidate_state) THEN contribution_amount ELSE 0 END) as out_of_state,
sum(contribution_amount) AS SUM,
count(*) AS COUNT
FROM actblue_contribs
WHERE coalesce(beneficiary_candidate_office,cand_office) IN ('S',
'H')
AND extract(YEAR
FROM contribution_date) >= 2017
GROUP BY cand_id) AS totals
LEFT JOIN cands USING (cand_id)
ORDER BY SUM DESC
""", con=conn)
)
actblue_cand_totals.head(10)
actblue_cand_totals.to_csv('data/actblue_cand_totals.csv')
actblue_cand_totals['percent_out'] = actblue_cand_totals['out_of_state']/actblue_cand_totals['sum']
actblue_cand_totals[actblue_cand_totals['sum'] > 1000000].sort_values(by='percent_out',ascending=False).head(100)
actblue_cand_totals.sort_values(by='out_of_state',ascending=False).head(10)
actblue_states = read_or_save(
'actblue_states',
lambda: pd.read_sql("""
SELECT contributor_state,
coalesce(cand_office_st,beneficiary_candidate_state) as recipient_state,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE extract(YEAR FROM contribution_date) >= 2017
and coalesce(beneficiary_candidate_office,cand_office) in ('H','S')
GROUP BY coalesce(cand_office_st,beneficiary_candidate_state),
contributor_state
""", con=conn)
)
actblue_states.sort_values(by=['sum'], ascending=False).head(10)
actblue_states.to_csv('data/actblue_states.csv')
Spreadsheet: actblue_states.csv
Jan. 1, 2017 — Sept. 30, 2018
actblue_contributor_states = actblue_states.groupby(actblue_states['contributor_state']).agg({'sum': 'sum'})
actblue_contributor_states['percent'] = actblue_contributor_states['sum']/actblue_contributor_states['sum'].sum()*100
actblue_contributor_states.sort_values(by=['percent'], ascending=False).head(10)
actblue_contributor_states[actblue_contributor_states.index.isin(['CA','NY'])]['percent'].sum()
actblue_contributor_states.to_csv('data/actblue_contributor_states.csv')
Jan. 1, 2017 — Sept. 30, 2018
actblue_recipient_states = actblue_states.groupby(actblue_states['recipient_state']).agg({'sum': 'sum'})
actblue_recipient_states['percent'] = actblue_recipient_states['sum']/actblue_recipient_states['sum'].sum()*100
actblue_recipient_states.sort_values(by=['percent'], ascending=False)
Jan. 1, 2017 — Sept. 30, 2018
actblue_states['in'] = actblue_states['contributor_state'] == actblue_states['recipient_state']
actblue_states.groupby(actblue_states['in'])['sum'].sum()
actblue_states[actblue_states['in'] == False]['sum'].sum()/actblue_states['sum'].sum()*100
actblue_states_beto = read_or_save(
'actblue_states_beto',
lambda: pd.read_sql("""
SELECT contributor_state,
coalesce(cand_office_st,beneficiary_candidate_state) as recipient_state,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE extract(YEAR FROM contribution_date) >= 2017
AND beneficiary_committee_fec_id = 'C00501197'
GROUP BY coalesce(cand_office_st,beneficiary_candidate_state),
contributor_state
""", con=conn)
)
actblue_states_beto['in'] = actblue_states_beto['contributor_state'] == actblue_states_beto['recipient_state']
actblue_states_beto.groupby(actblue_states_beto['in'])['sum'].sum()
actblue_states_beto.sort_values(by='sum', ascending=False).head(10)
actblue_states_beto[actblue_states_beto['in'] == False]['sum'].sum()/actblue_states_beto['sum'].sum()*100
actblue_states_mcgrath = read_or_save(
'actblue_states_mcgrath',
lambda: pd.read_sql("""
SELECT contributor_state,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE extract(YEAR FROM contribution_date) >= 2017
AND beneficiary_committee_fec_id = 'C00646745'
GROUP BY contributor_state
""", con=conn)
)
actblue_states_mcgrath['in'] = (actblue_states_mcgrath['contributor_state'] == 'KY')
actblue_states_mcgrath.groupby(actblue_states_mcgrath['in'])['sum'].sum()
actblue_states_mcgrath.sort_values(by='sum', ascending=False).head(10)
mcgrath_itemized_states = read_or_save(
'mcgrath_itemized_states',
lambda: pd.read_sql("""
SELECT contributor_state,
count(*) AS COUNT,
sum(contribution_amount) AS SUM
FROM fec_contributions
JOIN fec_amended_filings USING (filing_id)
WHERE filer_committee_id_number = 'C00646745'
AND form_type = 'SA11AI'
AND extract(YEAR
FROM contribution_date) >= 2017
AND (memo_code IS NULL
OR memo_code != 'X')
GROUP BY contributor_state
ORDER BY SUM DESC
""", con=conn)
)
mcgrath_itemized_states.sort_values(by='sum',ascending=False).head(10)
actblue_mcgrath_q3 = read_or_save(
'actblue_mcgrath_q3',
lambda: pd.read_sql("""
SELECT count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE contribution_date >= '2018-07-01'
AND contribution_date <= '2018-09-30'
AND beneficiary_committee_fec_id = 'C00646745'
""", con=conn)
)
actblue_mcgrath_q3
cruz_itemized_states = read_or_save(
'cruz_itemized_states',
lambda: pd.read_sql("""
SELECT contributor_state,
count(*) AS COUNT,
sum(contribution_amount) AS SUM
FROM fec_contributions
JOIN fec_amended_filings USING (filing_id)
WHERE filer_committee_id_number = 'C00492785'
AND form_type = 'SA11AI'
AND extract(YEAR
FROM contribution_date) >= 2017
AND (memo_code IS NULL
OR memo_code != 'X')
GROUP BY contributor_state
ORDER BY SUM DESC
""", con=conn)
)
cruz_itemized_states['percent'] = cruz_itemized_states['sum']/cruz_itemized_states['sum'].sum()*100
cruz_itemized_states.sort_values(by='sum',ascending=False).head(10)
Jan. 1, 2017 — Sept. 30, 2018
pres_votes = pd.read_csv('data/presvotes.csv')
pres_votes['red'] = pres_votes['clinton'] < pres_votes['trump']
pres_votes['blue'] = pres_votes['clinton'] > pres_votes['trump']
# pres_votes
pres_votes
pres_votes['margin'] = (pres_votes['clinton']/pres_votes['total'])-(pres_votes['trump']/pres_votes['total'])
pres_votes_indexed = pres_votes.set_index('state')
votes_vs_contribs = actblue_contributor_states.merge(pres_votes_indexed, left_index=True, right_index=True, how='inner')
votes_vs_contribs['dollars_per'] = votes_vs_contribs['sum']/votes_vs_contribs['total']
votes_vs_contribs[votes_vs_contribs.index != "DC"].plot.scatter(x='margin', y='dollars_per')
votes_vs_contribs[votes_vs_contribs['red']]['sum'].sum()/votes_vs_contribs[votes_vs_contribs['red']]['total'].sum()
votes_vs_contribs[votes_vs_contribs['blue']]['sum'].sum()/votes_vs_contribs[votes_vs_contribs['blue']]['total'].sum()
votes_vs_contribs[(votes_vs_contribs['margin'] > 0) & (votes_vs_contribs['margin'] < 0.2)]['sum'].sum()/votes_vs_contribs[(votes_vs_contribs['margin'] > 0) & (votes_vs_contribs['margin'] < 0.2)]['total'].sum()
votes_vs_contribs[(votes_vs_contribs['margin'] >= 0.2)]['sum'].sum()/votes_vs_contribs[(votes_vs_contribs['margin'] >= 0.2)]['total'].sum()
votes_vs_contribs[(votes_vs_contribs['margin'] >= 0.2)]
# votes_vs_contribs[votes_vs_contribs['dollars_per'] > 7.5]
# votes_vs_contribs[votes_vs_contribs['dollars_per'] <= 1.3]
# votes_vs_contribs.groupby(votes_vs_contribs['blue'])['sum'].sum()
contrib_flow = pres_votes[['state','blue']].merge(pres_votes[['state','blue']].merge(actblue_states, right_on='contributor_state', left_on='state', how='inner'), right_on='recipient_state', left_on='state', how='inner', suffixes=['_recip','_contrib'])
contrib_flow
pd.pivot_table(contrib_flow, values='sum', index=['blue_recip'], columns=['blue_contrib'], aggfunc=np.sum)
contrib_flow[(~contrib_flow['blue_recip']) & (contrib_flow['blue_contrib'])]['sum'].sum()
contrib_flow[(contrib_flow['blue_recip']) & (~contrib_flow['blue_contrib'])]['sum'].sum()
contrib_flow[(~contrib_flow['blue_recip']) & (contrib_flow['blue_contrib'])]['sum'].sum()/contrib_flow[(contrib_flow['blue_recip']) & (~contrib_flow['blue_contrib'])]['sum'].sum()
pd.pivot_table(contrib_flow[~contrib_flow['in']], values='sum', index=['blue_recip'], columns=['blue_contrib'], aggfunc=np.sum)
actblue_recipient_out_states = actblue_states[~actblue_states['in']].groupby(actblue_states[~actblue_states['in']]['recipient_state']).agg({'sum': 'sum'})
actblue_recipient_out_states['percent'] = actblue_recipient_out_states['sum']/actblue_recipient_out_states['sum'].sum()*100
actblue_recipient_out_states.sort_values(by=['percent'], ascending=False)
actblue_states_14 = read_or_save(
'actblue_states_14',
lambda: pd.read_sql("""
SELECT contributor_state,
coalesce(cand_office_st,beneficiary_candidate_state) as recipient_state,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE extract(YEAR FROM contribution_date) >= 2013
and extract(YEAR FROM contribution_date) <= 2014
and coalesce(beneficiary_candidate_office,cand_office) in ('H','S')
GROUP BY coalesce(cand_office_st,beneficiary_candidate_state),
contributor_state
""", con=conn)
)
actblue_states_14.sort_values(by=['sum'], ascending=False).head(10)
actblue_states_14['in'] = actblue_states_14['contributor_state'] == actblue_states_14['recipient_state']
contrib_flow_14 = pres_votes[['state','blue']].merge(pres_votes[['state','blue']].merge(actblue_states_14, right_on='contributor_state', left_on='state', how='inner'), right_on='recipient_state', left_on='state', how='inner', suffixes=['_recip','_contrib'])
pd.pivot_table(contrib_flow_14, values='sum', index=['blue_recip'], columns=['blue_contrib'], aggfunc=np.sum)
pd.pivot_table(contrib_flow_14[~contrib_flow_14['in']], values='sum', index=['blue_recip'], columns=['blue_contrib'], aggfunc=np.sum)
pres_votes[(pres_votes['state'] == 'CA') | (pres_votes['state'] == 'NY')]
pres_votes['clinton'].sum()
(pres_votes[pres_votes['state'] == 'CA']['clinton'].sum()+pres_votes[pres_votes['state'] == 'NY']['clinton'].sum())/pres_votes['clinton'].sum()*100
actblue_texas_cities = read_or_save(
'actblue_texas_cities',
lambda: pd.read_sql("""
SELECT lower(contributor_city),
count(*) AS COUNT,
sum(contribution_amount) AS SUM
FROM actblue_contribs
WHERE contributor_state = 'TX'
AND extract(YEAR
FROM contribution_date) > 2017
AND coalesce(beneficiary_candidate_office,cand_office) IN ('H','S')
GROUP BY lower(contributor_city)
ORDER BY COUNT DESC
""", con=conn)
)
actblue_texas_cities['percent'] = actblue_texas_cities['sum']/actblue_texas_cities['sum'].sum()*100
actblue_texas_cities.head(5)
actblue_texas_cities[actblue_texas_cities['lower'].isin(['austin','houston','dallas'])]['percent'].sum()
specials = ['C00631564',
'C00634543',
'C00662650',
'C00615823',
'C00656116',
'C00663161',
'C00655332',
'C00647339',
'C00680348',
'C00632067',
'C00631945',
'C00629659',
'C00633362',
'C00641340',
'C00640623',
'C00657411',
'C00663914',
'C00655977',
'C00639401',
'C00633610',
'C00667964',
'C00630426',
'C00634709',
'C00680942',
'C00632232',
'C00658708',
'C00678672',
'C00649897',
'C00660142',
'C00679779',
'C00647917',
'C00635128']
actblue_specials = actblue_proportion_amount[actblue_proportion_amount['cand_pcc'].isin(specials)].merge(actblue_transfers, left_on='cand_pcc', right_on='beneficiary_committee_fec_id', how='inner')[['cand_pcc','name','committee_total','actblue_total']]
actblue_specials[['committee_total','actblue_total']].sum()
Jan. 1, 2013 — Sept. 30, 2018
actblue_freq = read_or_save(
'actblue_freq',
lambda: pd.read_sql("""
SELECT COUNT,
count(*) AS count_of_count
FROM
(SELECT contributor_first_name,
contributor_last_name,
left(contributor_zip_code,5),
count(DISTINCT contribution_date::text || contribution_amount) AS COUNT
FROM actblue_contribs
WHERE extract(year from contribution_date) >= 2017
GROUP BY contributor_first_name,
contributor_last_name,
left(contributor_zip_code,5)) AS donors
GROUP BY COUNT
""", con=conn)
)
actblue_freq['percent'] = actblue_freq['count_of_count']/actblue_freq['count_of_count'].sum()*100
actblue_freq.sort_values(by=['count_of_count'], ascending=False).head(5)
actblue_freq.to_csv('data/actblue_freq.csv')
Spreadsheet: actblue_freq.csv
Jan. 1, 2017 — Sept. 30, 2018
actblue_addicts = read_or_save(
'actblue_addicts',
lambda: pd.read_sql("""
SELECT contributor_first_name,
contributor_last_name,
array_agg(DISTINCT contributor_occupation),
array_agg(DISTINCT contributor_employer),
array_agg(DISTINCT contributor_street_1),
array_agg(DISTINCT contributor_city),
contributor_state,
left(contributor_zip_code,5) AS contributor_zip_code,
count(DISTINCT contribution_date::text || contribution_amount) AS COUNT,
sum(contribution_amount) AS total,
sum(contribution_amount)/count(DISTINCT contribution_date::text || contribution_amount) AS avg_per
FROM actblue_contribs
WHERE extract(YEAR
FROM contribution_date) > 2016
GROUP BY contributor_first_name,
contributor_last_name,
contributor_state,
left(contributor_zip_code,5)
ORDER BY COUNT DESC
LIMIT 100
""", con=conn)
)
actblue_addicts
actblue_addicts.to_csv('data/actblue_addicts.csv')
Spreadsheet: actblue_addicts.csv
actblue_wolfe = read_or_save(
'actblue_wolfe',
lambda: pd.read_sql("""
SELECT contribution_date,
contribution_amount,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE contributor_last_name = 'WOLFE'
AND contributor_first_name = 'TERRY'
AND contributor_zip_code ILIKE '26508%'
AND coalesce(beneficiary_candidate_office,cand_office) IN ('S',
'H')
AND cand_pty_affiliation IN ('DEM','DFL')
AND extract(YEAR
FROM contribution_date) >= 2017
GROUP BY contribution_date,
contribution_amount
ORDER BY count(*) DESC
""", con=conn)
)
actblue_wolfe.head(5)
actblue_wolfe['sum'].sum()
actblue_wolfe['sum'].mean()
actblue_wolfe_indies = read_or_save(
'actblue_wolfe_indies',
lambda: pd.read_sql("""
SELECT *
FROM actblue_contribs
WHERE contributor_last_name = 'WOLFE'
AND contributor_first_name = 'TERRY'
AND contributor_zip_code ILIKE '26508%'
AND coalesce(beneficiary_candidate_office,cand_office) IN ('S',
'H')
AND cand_pty_affiliation NOT IN ('DEM','DFL')
AND extract(YEAR
FROM contribution_date) >= 2017
""", con=conn)
)
actblue_wolfe_indies
actblue_wolfe_ojeda = read_or_save(
'actblue_wolfe_ojeda',
lambda: pd.read_sql("""
SELECT *
FROM actblue_contribs
WHERE contributor_last_name = 'WOLFE'
AND contributor_first_name = 'TERRY'
AND contributor_zip_code ILIKE '26508%'
AND cand_pcc = 'C00639989'
AND extract(YEAR
FROM contribution_date) >= 2017
""", con=conn)
)
actblue_wolfe_ojeda['contribution_amount'].sum()
actblue_wolfe_ojeda['contribution_amount'].count()
actblue_barlow = read_or_save(
'actblue_barlow',
lambda: pd.read_sql("""
SELECT contribution_date,
contribution_amount,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE contributor_last_name = 'BARLOW'
AND contributor_first_name = 'SIBYLLE'
AND contributor_zip_code ILIKE '01742%'
--AND coalesce(beneficiary_candidate_office,cand_office) IN ('S',
-- 'H')
AND extract(YEAR
FROM contribution_date) >= 2017
GROUP BY contribution_date,
contribution_amount
ORDER BY count(*) DESC
""", con=conn)
)
actblue_barlow['contribution_amount'].describe()
actblue_barlow_out_of_state = read_or_save(
'actblue_barlow_out_of_state',
lambda: pd.read_sql("""
SELECT sum(CASE WHEN contributor_state != coalesce(cand_office_st,beneficiary_candidate_state) THEN 1 ELSE 0 END) as out_of_state,
sum(CASE WHEN contributor_state = coalesce(cand_office_st,beneficiary_candidate_state) THEN 1 ELSE 0 END) as in_state
FROM actblue_contribs
WHERE contributor_last_name = 'BARLOW'
AND contributor_first_name = 'SIBYLLE'
AND contributor_zip_code ILIKE '01742%'
--AND coalesce(beneficiary_candidate_office,cand_office) IN ('S',
-- 'H')
AND extract(YEAR
FROM contribution_date) >= 2017
""", con=conn)
)
actblue_barlow_out_of_state
actblue_nottingham = read_or_save(
'actblue_nottingham',
lambda: pd.read_sql("""
SELECT *
FROM actblue_contribs
WHERE contributor_first_name = 'WILLIAM'
AND contributor_last_name = 'NOTTINGHAM'
AND contributor_zip_code ILIKE '90720%'
AND extract(YEAR
FROM contribution_date) >= 2017
""", con=conn)
)
actblue_nottingham['contribution_amount'].sum()
actblue_first_names = read_or_save(
'actblue_first_names',
lambda: pd.read_sql("""
SELECT contributor_first_name,
count(*),
sum(contribution_amount)
FROM actblue_contribs
WHERE extract(YEAR FROM contribution_date) >= 2017
and coalesce(beneficiary_candidate_office,cand_office) in ('H','S')
GROUP BY contributor_first_name
""", con=conn)
)
d = gender.Detector(case_sensitive=False)
actblue_first_names['gender'] = actblue_first_names['contributor_first_name'].apply(d.get_gender)
actblue_first_names.sort_values(by=['count'], ascending=False).head(5)
actblue_gender = actblue_first_names.groupby(actblue_first_names['gender'])['count','sum'].sum()
actblue_gender['percent_by_count'] = actblue_gender['count']/actblue_gender['count'].sum()*100
actblue_gender['percent_by_sum'] = actblue_gender['sum']/actblue_gender['sum'].sum()*100
actblue_gender
%%bash
jupyter nbconvert analysis.ipynb