-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathInfiniteScrollItems.js
66 lines (60 loc) · 2.12 KB
/
InfiniteScrollItems.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
const { chromium } = require('playwright');
const fs = require('fs');
const scrapeInfiniteScrollItems = async (
page,
itemTargetCount,
scrollDelay = 1000,
) => {
let items = [];
try {
while (items.length < itemTargetCount) {
items = await page.evaluate(() => {
const extractedElements = document.querySelectorAll(
'[data-qa-id="doctor_listing_cards"] div.listing-doctor-card',
);
if (document.querySelector('.error--content')) {
console.log('Error: while get data');
return;
}
return Array.from(extractedElements).map((el) => ({
name: el.querySelector('.doctor-name').innerText,
specialisation: el.querySelector(
'[data-qa-id="doctor_specialisation"]',
).innerText,
experience: el.querySelector('[data-qa-id="doctor_experience"]')
.innerText,
city: el.querySelector('[data-qa-id="practice_city"]').innerText,
clinic_name: el.querySelector('[data-qa-id="doctor_clinic_name"]')
.innerText,
consultation_fee: el.querySelector('[data-qa-id="consultation_fee"]')
.innerText,
}));
});
const random = Math.random();
const divBy = random > 0.5 && random < 0.9 ? random : 0.75;
await page.evaluate(
`window.scrollTo({top: (document.body.scrollHeight * ${divBy}), behavior: "smooth"})`,
);
await page.waitForTimeout(scrollDelay);
}
} catch (e) {
console.log('ERROR');
console.error(e);
}
return items;
};
(async () => {
const browser = await chromium.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
page.setViewportSize({ width: 1280, height: 926 });
await page.goto('https://www.practo.com/Bangalore/doctors');
const items = (await scrapeInfiniteScrollItems(page, 100)) || [];
// Save extracted items to a file.
console.log(`Check items.json, Scrapped ${items.length} items.`);
fs.writeFileSync('./items.json', JSON.stringify(items, null, 4));
await page.close();
await browser.close();
})();