1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
   | const fs = require('fs') const request = require('request') const cheerio = require('cheerio') const encoding = require('encoding')
 
  request.defaults({     proxy: "http://127.0.0.1:8888",     rejectunauthorized: false, })
  var pageIndex = 1; var pageLength = 0 const maxPage = 148 let dataIndexArr = []
 
  let download = data => new Promise((resolve,reject)=>{     request.get(data.link,(err,res,body)=>{         if(res.statusCode == 200){             setTimeout(()=>{                 let $ = cheerio.load(res.body)                 let tit = $('#subject_tpc').html()                 let inner = $('#read_tpc').html().replace(/<br><br>/g,'\n').replace(/<br>/g,'')                 fs.writeFile(__dirname + `/txt/${tit}.txt`,inner,function(){                     console.log('当前完成索引:',data.index,'总个数:',pageLength,'当前页数:',pageIndex,'总页数:',maxPage);                     resolve('success')                 })             },data.index*500)         }     }) })
 
  async function getAllPage(url){     request.get(url,(err,res,body)=>{         if(res.statusCode == 200){             let $ = cheerio.load(res.body)             let list = $('#ajaxtable').find('h3')                          let dataList = []             if(pageIndex == 1){                 pageLength = list.length                 for(let i = 7;i < pageLength;i++){                     let a = i                     dataList.push(download({                         name:list.eq(a).find('a').html(),                         link:list.eq(a).find('a').attr('href'),                         index:i                     }))                 }             }else{                 pageLength = list.length                 for(let i = 0;i < pageLength;i++){                     let a = i                     dataList.push(download({                         name:list.eq(a).find('a').html(),                         link:list.eq(a).find('a').attr('href'),                         index:i                     }))                 }             }                                      Promise.all(dataList).then(res=>{                 console.log('PromiseAll success pageIndex',pageIndex);                 if(pageIndex <= maxPage){                     pageIndex++                     getAllPage(url)                 }             }).catch(err=>{                 console.log(err);             })         }     }) }
  getAllPage(url)
 
  |