1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
| const fs = require('fs') const request = require('request') const cheerio = require('cheerio') const encoding = require('encoding')
request.defaults({ proxy: "http://127.0.0.1:8888", rejectunauthorized: false, })
var pageIndex = 1; var pageLength = 0 const maxPage = 148 let dataIndexArr = []
let download = data => new Promise((resolve,reject)=>{ request.get(data.link,(err,res,body)=>{ if(res.statusCode == 200){ setTimeout(()=>{ let $ = cheerio.load(res.body) let tit = $('#subject_tpc').html() let inner = $('#read_tpc').html().replace(/<br><br>/g,'\n').replace(/<br>/g,'') fs.writeFile(__dirname + `/txt/${tit}.txt`,inner,function(){ console.log('当前完成索引:',data.index,'总个数:',pageLength,'当前页数:',pageIndex,'总页数:',maxPage); resolve('success') }) },data.index*500) } }) })
async function getAllPage(url){ request.get(url,(err,res,body)=>{ if(res.statusCode == 200){ let $ = cheerio.load(res.body) let list = $('#ajaxtable').find('h3') let dataList = [] if(pageIndex == 1){ pageLength = list.length for(let i = 7;i < pageLength;i++){ let a = i dataList.push(download({ name:list.eq(a).find('a').html(), link:list.eq(a).find('a').attr('href'), index:i })) } }else{ pageLength = list.length for(let i = 0;i < pageLength;i++){ let a = i dataList.push(download({ name:list.eq(a).find('a').html(), link:list.eq(a).find('a').attr('href'), index:i })) } } Promise.all(dataList).then(res=>{ console.log('PromiseAll success pageIndex',pageIndex); if(pageIndex <= maxPage){ pageIndex++ getAllPage(url) } }).catch(err=>{ console.log(err); }) } }) }
getAllPage(url)
|