1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
|
from bs4 import BeautifulSoup import re import urllib3 import pymysql import time
count = 0 def saveDatasMySQL(datas_box,i): conn = pymysql.connect('localhost', user="root", passwd="123456", db="test") cursor=conn.cursor() sql="insert into lottery_ssq(`createtime`,`time`,red1,red2,red3,red4,red5,red6,blue1) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)" insert=cursor.executemany(sql,datas_box) cursor.close() conn.commit() conn.close() print('sql执行成功---'+str(i)) time.sleep(.5)
def get_html(url,i): datas_box = [] http = urllib3.PoolManager() res = http.request('GET', url) html = res.data.decode('utf-8')
soup = BeautifulSoup(html, 'lxml') for child in soup.table.children: str111 = re.findall('<td align="center">(.*?)</td>',str(child)) if len(str111) > 0: str_red = re.findall('<em class="rr">(.*?)</em>', str(child)) str_blue = re.findall('<em>(.*?)</em>', str(child)) datas_box.append((str111[0],str111[1],str_red[0],str_red[1],str_red[2],str_red[3],str_red[4],str_red[5],str_blue[0])) saveDatasMySQL(datas_box,i)
def html(arg): print(arg) i = 0 for i in range(1,129): get_html('http://kaijiang.zhcw.com/zhcw/html/ssq/list_'+str(i)+'.html',i)
print('--执行完毕--')
|