-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape_appraisers.js
More file actions
117 lines (99 loc) · 4.19 KB
/
scrape_appraisers.js
File metadata and controls
117 lines (99 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
const { chromium } = require('playwright-chromium');
const { parse } = require('csv-parse/sync');
const fs = require('fs');
const path = require('path');
(async () => {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({ acceptDownloads: true });
const page = await context.newPage();
// Forward browser console logs to Node
page.on('console', (msg) => {
if (msg.text().includes('Extracted') || msg.text().includes('Page')) {
console.log('BROWSER:', msg.text());
}
});
try {
console.log('Navigating to Appraisal Institute...');
await page.goto(
'https://ai.appraisalinstitute.org/eweb/DynamicPage.aspx?webcode=aifaasearch',
{ waitUntil: 'networkidle' }
);
// Wait for the state dropdown to render (page is JavaScript-driven).
console.log('Waiting for state selector to render...');
await page.waitForSelector('select#state_select', { timeout: 30000 });
// Selecting a state auto-populates the results sidebar — no Search button needed.
console.log('Selecting North Carolina...');
await page.selectOption('select#state_select', 'NC');
// Filter to Commercial only (excludes Residential appraisers).
console.log('Applying Commercial filter...');
await page.waitForSelector(
'a[onclick="result_filter_sort_servicearea(\'\',\'C\',\'R\');"]',
{ timeout: 30000 }
);
await page.click(
'a[onclick="result_filter_sort_servicearea(\'\',\'C\',\'R\');"]'
);
// Wait for the download link to appear in the results sidebar.
console.log('Waiting for export link...');
await page.waitForSelector(
'div#download_servicearea > a#export_servicearea',
{ timeout: 60000 }
);
// Intercept the file download — must set up the listener before clicking.
console.log('Downloading CSV export...');
const [download] = await Promise.all([
page.waitForEvent('download'),
page.click('div#download_servicearea > a#export_servicearea'),
]);
// Read the download stream directly into a Buffer (no temp file needed).
const stream = await download.createReadStream();
const chunks = [];
for await (const chunk of stream) chunks.push(chunk);
const csvBuffer = Buffer.concat(chunks);
console.log('Parsing CSV...');
const records = parse(csvBuffer, {
columns: true, // first row is the header
skip_empty_lines: true,
trim: true,
});
// Extract city and state from whichever address line matches "City, ST ZIP".
// The city line position varies per record (Adrs_Line2 through Adrs_Line5).
const cityStateRe = /^(.+),\s+([A-Z]{2})\s+\d{5}/;
// Map CSV columns to the appraiser schema.
const appraisers = records.map((row) => {
let city = '', state = '';
for (const key of ['Adrs_Line2', 'Adrs_Line3', 'Adrs_Line4', 'Adrs_Line5']) {
const m = (row[key] || '').match(cityStateRe);
if (m) { city = m[1].trim(); state = m[2].trim(); break; }
}
const rawPhone = row['Phone'] || '';
const phoneMatch = rawPhone.match(/\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/);
return {
name: row['Appraiser_Name'] || '',
company: row['Company_Name'] || '',
city,
state,
email: '', // not included in the CSV export
phone: phoneMatch ? phoneMatch[0] : rawPhone,
licenseNumber: 'N/A',
licenseType: row['Valuation_Focus'] || '',
profile: row['Profile'] || '',
memberType: row['Member'] || '',
memberSince: row['Year'] || '',
available: row['Fee_Asgn'] || '',
};
}).filter((a) => a.name !== '');
console.log(`Extraction complete. Total appraisers: ${appraisers.length}`);
const output = {
title: 'Appraisers List',
appraiser: appraisers,
};
fs.writeFileSync('appraisers.JSON', JSON.stringify(output, null, 2));
console.log('Data saved to appraisers.JSON');
await browser.close();
} catch (err) {
console.error('Error during scraping:', err);
await browser.close();
process.exit(1);
}
})();