crawler

const puppeteer = require('puppeteer'); const fs = require('fs'); const cheerio = require('cheerio'); let CNTT_K17 = []; function isEmpty(obj) { for (var key in obj) { if (obj.hasOwnProperty(key)) return false; } return true; } const getData = async mssv => { const browser = await puppeteer.launch(); const page = await browser.newPage(); const page2 = await browser.newPage(); await page2.goto( `http://thongtindaotao.sgu.edu.vn/Default.aspx?page=thoikhoabieu&sta=1&id=${mssv}`, ); const thongTinCaNhan = await page2.evaluate(() => { try { let ten = document.getElementById( 'ctl00_ContentPlaceHolder1_ctl00_lblContentTenSV', ).innerText; let lop = document.getElementById( 'ctl00_ContentPlaceHolder1_ctl00_lblContentLopSV', ).innerText; const mssv = document.getElementById( 'ctl00_ContentPlaceHolder1_ctl00_lblContentMaSV', ).innerText; return { mssv: mssv, ten: ten.split(' - ')[0], ngaySinh: ten.split(' - ')[1].split(': ')[1], lop: lop.split(' - ')[0], nganh: lop.split(' - ')[1].split(': ')[1], khoa: lop.split(' - ')[2].split(': ')[1], }; } catch (err) { return null; } }); await page.goto( `http://thongtindaotao.sgu.edu.vn/Default.aspx?page=xemdiemthi&id=${mssv}`, ); // click and wait for content of new page try { await page.click('#ctl00_ContentPlaceHolder1_ctl00_lnkChangeview2'); await page.waitForNavigation(); // using cheerio const content = await page.content(); const $ = await cheerio.load(content); // crawl something const soHK = $('.row-diemTK .Label').length / 14; let diem = { ...thongTinCaNhan }; for (let i = 1; i <= soHK; i++) { diem = { ...diem, ['HK' + i]: {} }; for (let j = 0; j <= 7; j++) { if (j % 2 === 0) { const keyIndex = 14 * (i - 1) + j; const valueIndex = 14 * (i - 1) + j + 1; diem = { ...diem, ['HK' + i]: { ...diem['HK' + i], [$('.row-diemTK .Label') .eq(keyIndex) .text()]: $('.row-diemTK .Label') .eq(valueIndex) .text(), }, }; } } } if (!isEmpty(diem)) { CNTT_K17.push(diem); console.log(diem); } await browser.close(); } catch (err) { await browser.close(); } }; const app = async () => { for (let i = 3117410000; i < 3117410400; i++) { await getData(i); } fs.writeFile('./db.json', JSON.stringify(CNTT_K17), err => { err && console.log(err); }); console.log('done'); console.log(CNTT_K17); ``; }; app();

Be the first to comment

You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.