The PA House/Senate contain a list of committee assignments, but there is no way to get this to an Excel format.
The below script does the following:
- Extract the assignments as listed
- Mark each rep to their committees and subcommitees
- Link to district, party in a CSV
const fs = require('fs');
const house = fs.readFileSync('./scraping/legislature/House Member Committee Assignments - PA House of Representatives.html') + '';
const Papa = require('papaparse');
let cheerio = require('cheerio');
const nameToDistrict = {};
const allCommittees = {};
`Name Party District
Aerion Abney D 19
Joseph Adams R 139
Mike Armanini R 75
Jacob Banta R 4
Jamie Barton R 124
Anthony Bellmon D 203
Jessica Benham D 36
Kerry Benninghoff R 171
Aaron Bernstine R 8
Ryan Bizzarro D 3
Timothy Bonner R 17
Stephanie Borowicz R 76
Lisa Borowski D 168
Heather Boyd D 163
Kevin Boyle D 172
Matthew Bradford D 70
Tim Brennan D 29
Tim Briggs D 149
Amen Brown D 10
Marla Brown R 9
Donna Bullock D 195
Danilo Burgos D 197
Frank Burns D 72
Mike Cabell R 117
Martin Causer R 67
Johanny Cepeda-Freytiz D 129
Morgan Cephas D 192
Melissa Cerrato D 151
Joe Ciresi D 146
Scott Conklin D 77
Bud Cook R 50
Jill Cooper R 55
Gina Curry D 164
Bryan Cutler R 100
Joseph D'Orsie R 47
Mary Jo Daley D 148
Eric Davanzo R 58
Tina Davis D 141
Jason Dawkins D 179
Daniel Deasy D 27
David Delloso D 162
Sheryl Delozier R 88
Russ Diamond R 102
Kyle Donahue D 113
George Dunbar R 56
Torren Ecker R 193
Joe Emrick R 137
Mindy Fee R 37
Elizabeth Fiedler D 184
Wendy Fink R 94
Justin Fleming D 105
Jamie Flick R 83
Ann Flood R 138
Dan Frankel D 23
Robert Freeman D 136
Paul Friel D 26
Jonathan Fritz R 111
Pat Gallagher D 173
John Galloway D 140
Valerie Gaydos R 44
Matthew Gergely D 35
Mark Gillen R 128
Jose Giral D 180
Barbara Gleim R 199
G. Roni Green D 190
Jim Gregory R 80
Keith Greiner R 43
Seth Grove R 196
Nancy Guenst D 152
Manuel Guzman Jr. D 127
Jim Haddock D 118
Joe Hamm R 84
Liz Hanbidge D 61
Patrick Harkins D 1
Jordan Harris D 186
Doyle Heffley R 122
Carol Hill-Evans D 95
Joe Hogan R 142
Joseph C. Hohenstein D 177
Kristine Howard D 167
Rich Irvin R 81
MaryLouise Isaacson D 175
R. Lee James R 64
Mike Jones R 93
Tom Jones R 98
Barry Jozwiak R 5
Joshua Kail R 15
Aaron Kaufer R 120
Rob Kauffman R 89
Carol Kazeem D 159
Dawn Keefer R 92
Malcolm Kenyatta D 181
Dallas Kephart R 73
Joe Kerwin R 125
Tarik Khan D 194
Patty Kim D 103
Emily Kinkead D 20
Stephen Kinsey D 201
Kate Klunk R 169
Bridget Kosierowski D 114
Rick Krajewski D 188
Leanne Krueger D 161
Charity Grimm Krupa R 51
Anita Astorino Kulik D 45
Thomas Kutz R 87
Andrew Kuzma R 39
Shelby Labs R 143
John Lawrence R 13
Robert Leadbeter R 109
Milou Mackenzie R 131
Ryan Mackenzie R 187
Maureen Madden D 115
Dave Madsen D 104
Abby Major R 60
Zachary Mako R 183
Steven Malagari D 53
David M. Maloney Sr. R 130
Kristin Marcell R 178
Brandon Markosek D 25
Jim Marshall R 14
Robert Matzie D 16
La'Tasha Mayes D 24
Joe McAndrew D 32
Joanna McClinton D 191
Jeanne McNeill D 133
Thomas L. Mehaffie III R 106
Steven Mentzer R 97
Robert Mercuri R 28
Robert Merski D 2
Carl Walker Metzgar R 69
Natalie Mihalek R 40
Brett Miller R 41
Dan Miller D 42
Dan Moul R 91
Kyle Mullins D 112
Brian Munroe D 144
Marci Mustello R 11
Ed Neilson D 174
Eric Nelson R 57
Napoleon Nelson D 154
Jennifer O'Mara D 165
Timothy O'Neal R 48
Donna Oberlander R 63
Jason Ortitay R 46
Danielle Friel Otten D 155
Clint Owlett R 68
Darisha Parker D 198
Eddie Day Pashinski D 121
Tina Pickett R 110
Chris Pielli D 156
Nick Pisciottano D 38
Tarah Probst D 189
Christopher Rabb D 200
Jack Rader Jr. R 176
Kathy Rapp R 65
Jim Rigby R 71
Brad Roae R 6
Leslie Rossi R 59
David H. Rowe R 85
Mark Rozzi D 126
Alec Ryncavage R 119
Abigail Salisbury D 34
Steve Samuelson D 135
Benjamin Sanchez D 153
Christina Sappey D 158
Paul Schemel R 90
Donna Scheuren R 147
John Schlegel R 101
Michael Schlossberg D 132
Louis C. Schmitt Jr. R 79
Peter Schweyer D 134
Stephenie Scialabba R 12
Greg Scott D 54
Melissa Shusterman D 157
Joshua Siegel D 22
Brian Smith R 66
Ismail Smith-Wade-El D 49
Jared Solomon D 202
Craig Staats R 145
Perry Stambaugh R 86
Mandy Steele D 33
Joanne Stehr R 107
Michael Stender R 108
James B. Struzzi II R 62
P. Michael Sturla D 96
Paul Takac D 82
Kathleen Tomlinson R 18
Jesse Topper R 78
Tim Twardzik R 123
Arvind Venkat D 30
Greg Vitali D 166
Ryan Warner R 52
Perry Warren D 31
Dane Watro R 116
Ben Waxman D 182
Joe Webster D 150
Parke Wentling R 7
Martina White R 170
Craig Williams R 160
Dan Williams D 74
Regina Young D 185
David Zimmerman R 99
Lindsay Powell Representative Lindsay Powell - PA House of Representatives (state.pa.us) D 21
`.split("\n").map(
(row) => {
const [name, party, something, district] = row.trim().split("\t");
nameToDistrict[name.trim()] = (district || '').trim();
}
);
const members = [];
let $ = cheerio.load(house);
$('.MemberInfoCteeList-Member').map(
(index, element) => {
const bioElt = $(element).children('.MemberInfoCteeList-Bio');
let name = bioElt.text().trim();
let parts = name.split(",");
let lastIndex = parts.length - 1;
console.log('parts[lastIndex]', parts[lastIndex])
let lastComponents = parts[lastIndex].trim().split(' ');
let party = lastComponents.pop();
//console.log('lastPart', lastPart)
console.log('party', party)
parts[lastIndex] = lastComponents.join(' ');
console.log('name', name);
console.log('parts', parts);
console.log(party);
party = party.substring(1, 2);
let first = parts[0];
parts[0] = parts[1];
parts[1] = first;
let realName = parts.join(" ").trim();
const record = {};
record.party = party;
record.name = realName;
record.district = '';
if (nameToDistrict[record.name]) {
record.district = nameToDistrict[record.name];
} else {
console.log(record.name);
let name2 = record.name.replace(/ \w[.] /, " ");
if (nameToDistrict[name2]) {
record.district = nameToDistrict[name2];
} else {
throw record.name;
}
}
let lastEntry = '';
let lastCommitee = '';
const committeesElt = $(bioElt).next().children().children().children().toArray().map(
(elt) => {
const kids = $(elt).children().toArray();
console.log(
kids.map(k => {
let role = "Member";
let txt = $(k).text();
if (txt.indexOf(", ") === 0) {
txt = txt.substring(2);
record[lastEntry] = txt.trim();
allCommittees[lastEntry] = '';
} else {
if (txt.indexOf("-") > 0) {
let parts = txt.split("-");
txt = parts[0].trim();
role = parts[1].trim();
}
if (txt.indexOf("Subcommittee ") < 0) {
record[txt] = role.trim();
allCommittees[txt] = '';
lastCommitee = txt.trim();
lastEntry = txt.trim();
} else {
record[lastCommitee + " - " + txt] = role.trim();
allCommittees[lastCommitee + " - " + txt] = '';
lastEntry = (lastCommitee + " - " + txt).trim();
}
}
//console.log(JSON.stringify(members, null, 2));
return txt;
})
)
}
);
members.push(record);
}
)
members.map(
(member) => {
Object.keys(allCommittees).map(
(comm) => {
if (!member.hasOwnProperty(comm)) {
member[comm] = '';
}
}
)
}
)
fs.writeFileSync('house.json', JSON.stringify(members, null, 2));
fs.writeFileSync('house.tsv', Papa.unparse(members));
}