const puppeteer = require('puppeteer');
const fs = require('fs');
const cheerio = require('cheerio');
let CNTT_K17 = [];
function isEmpty(obj) {
for (var key in obj) {
if (obj.hasOwnProperty(key)) return false;
}
return true;
}
const getData = async mssv => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const page2 = await browser.newPage();
await page2.goto(
`http://thongtindaotao.sgu.edu.vn/Default.aspx?page=thoikhoabieu&sta=1&id=${mssv}`,
);
const thongTinCaNhan = await page2.evaluate(() => {
try {
let ten = document.getElementById(
'ctl00_ContentPlaceHolder1_ctl00_lblContentTenSV',
).innerText;
let lop = document.getElementById(
'ctl00_ContentPlaceHolder1_ctl00_lblContentLopSV',
).innerText;
const mssv = document.getElementById(
'ctl00_ContentPlaceHolder1_ctl00_lblContentMaSV',
).innerText;
return {
mssv: mssv,
ten: ten.split(' - ')[0],
ngaySinh: ten.split(' - ')[1].split(': ')[1],
lop: lop.split(' - ')[0],
nganh: lop.split(' - ')[1].split(': ')[1],
khoa: lop.split(' - ')[2].split(': ')[1],
};
} catch (err) {
return null;
}
});
await page.goto(
`http://thongtindaotao.sgu.edu.vn/Default.aspx?page=xemdiemthi&id=${mssv}`,
);
// click and wait for content of new page
try {
await page.click('#ctl00_ContentPlaceHolder1_ctl00_lnkChangeview2');
await page.waitForNavigation();
// using cheerio
const content = await page.content();
const $ = await cheerio.load(content);
// crawl something
const soHK = $('.row-diemTK .Label').length / 14;
let diem = { ...thongTinCaNhan };
for (let i = 1; i <= soHK; i++) {
diem = { ...diem, ['HK' + i]: {} };
for (let j = 0; j <= 7; j++) {
if (j % 2 === 0) {
const keyIndex = 14 * (i - 1) + j;
const valueIndex = 14 * (i - 1) + j + 1;
diem = {
...diem,
['HK' + i]: {
...diem['HK' + i],
[$('.row-diemTK .Label')
.eq(keyIndex)
.text()]: $('.row-diemTK .Label')
.eq(valueIndex)
.text(),
},
};
}
}
}
if (!isEmpty(diem)) {
CNTT_K17.push(diem);
console.log(diem);
}
await browser.close();
} catch (err) {
await browser.close();
}
};
const app = async () => {
for (let i = 3117410000; i < 3117410400; i++) {
await getData(i);
}
fs.writeFile('./db.json', JSON.stringify(CNTT_K17), err => {
err && console.log(err);
});
console.log('done');
console.log(CNTT_K17);
``;
};
app();
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.