python爬取双色球数据更新及数据库使用

tsd

发布日期: 2021-06-09 13:49:20 浏览量: 52
评分:
star star star star star star star star star star_border
*转载请注明来自write-bug.com
  1. 解析网站并获取数据

    1. # 彩票数据所在的url
    2. url = 'http://datachart.500.com/ssq/'
    3. # 提取数据
    4. response = requests.get(url, headers={"User-Agent": UserAgent().chrome})
    5. # 通过xpath去解析
    6. e = etree.HTML(response.text)
    7. date_times = e.xpath('//tbody[@id="tdata"]/tr/td[1]/text()')
    8. trs = e.xpath('//tbody[@id="tdata"]/tr[not(@class)]')
  2. 链接数据库

    1. # 链接数据库
    2. client = pymysql.connect(host='localhost', port=3306, user='root', password='123456', charset='utf8', db='bangumi')
    3. cursor = client.cursor()
  3. 普通获取数据

    1. # 插入数据的sql
    2. sql = 'insert into doubleballs values(0,%s,%s,%s)'
    3. for data_time, tr in zip(date_times, trs):
    4. red_ball = '-'.join(tr.xpath('./td[@class="chartBall01"]/text()'))
    5. blue_ball = tr.xpath('./td[@class="chartBall02"]/text()')[0]
    6. print("第" + data_time + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    7. cursor.execute(sql, [data_time, red_ball, blue_ball])
    8. client.commit()
  4. 更新数据这部分从数据库中获取数据,然后反转顺序,index作为计数器,循环遍历假如有新数据要更新,那result返回值为0,index+1。

    1. # 查看数据是否存在
    2. select_new_sql = "select * from doubleballs where date_time = %s"
    3. date_times.reverse()
    4. # 记录有多少条新数据
    5. index = 0
    6. for data_time in date_times:
    7. result = cursor.execute(select_new_sql, [data_time])
    8. # 判断数据是否存在
    9. if result == 1:
    10. break
    11. index+=1
  5. 数据顺序反转,按照网站上xpath写法获取第1个数据,即最新的数据放到数据库中。

    1. # 数据从新到旧排序
    2. trs.reverse()
    3. for i in range(index):
    4. # 提取红球
    5. red_ball = '-'.join(trs[i].xpath('./td[@class="chartBall01"]/text()'))
    6. # 提取蓝球
    7. blue_ball = trs[i].xpath('./td[@class="chartBall02"]/text()')[0]
    8. print("第" + date_times[i] + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    9. cursor.execute(sql, [date_times[i], red_ball, blue_ball])
    10. client.commit()
  6. 完整代码

    1. import requests
    2. from fake_useragent import UserAgent
    3. from lxml import html
    4. import pymysql
    5. etree = html.etree
    6. # 彩票数据所在的url
    7. url = 'http://datachart.500.com/ssq/'
    8. # 提取数据
    9. response = requests.get(url, headers={"User-Agent": UserAgent().chrome})
    10. # 通过xpath去解析
    11. e = etree.HTML(response.text)
    12. date_times = e.xpath('//tbody[@id="tdata"]/tr/td[1]/text()')
    13. trs = e.xpath('//tbody[@id="tdata"]/tr[not(@class)]')
    14. # 链接数据库
    15. client = pymysql.connect(host='localhost', port=3306, user='root', password='123456', charset='utf8', db='bangumi')
    16. cursor = client.cursor()
    17. # 插入数据的sql
    18. sql = 'insert into doubleballs values(0,%s,%s,%s)'
    19. # 查看数据是否存在
    20. select_new_sql = "select * from doubleballs where date_time = %s"
    21. date_times.reverse()
    22. # 记录有多少条新数据
    23. index = 0
    24. for data_time in date_times:
    25. reslut = cursor.execute(select_new_sql, [data_time])
    26. # 判断数据是否存在
    27. if reslut == 1:
    28. break
    29. index += 1
    30. # 数据从新到旧排序
    31. trs.reverse()
    32. for i in range(index):
    33. # 提取红球
    34. red_ball = '-'.join(trs[i].xpath('./td[@class="chartBall01"]/text()'))
    35. # 提取蓝球
    36. blue_ball = trs[i].xpath('./td[@class="chartBall02"]/text()')[0]
    37. print("第" + date_times[i] + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    38. cursor.execute(sql, [date_times[i], red_ball, blue_ball])
    39. client.commit()
    40. # for data_time, tr in zip(date_times, trs):
    41. # red_ball = '-'.join(tr.xpath('./td[@class="chartBall01"]/text()'))
    42. # blue_ball = tr.xpath('./td[@class="chartBall02"]/text()')[0]
    43. # print("第" + data_time + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    44. # cursor.execute(sql, [data_time, red_ball, blue_ball])
    45. # client.commit()
    46. cursor.close()
    47. client.close()
  7. 这样排序会让更新之后最新的在数据最后,但是一开始排序的时候不会出现问题,是从新到旧的排序。于是因为有点强迫症,最后还是改成从旧到新排序。只需要改动几行代码即可,反转数据在提取红球蓝球数据前,保证数据最新,判断时每次存在就让index+1,之后加入index = count - index其中count = data_time.__len__()。接着将循环中的i变成index-i-1。最终代码如下,

    1. import requests
    2. from fake_useragent import UserAgent
    3. from lxml import html
    4. import pymysql
    5. etree = html.etree
    6. # 彩票数据所在的url
    7. url = 'http://datachart.500.com/ssq/'
    8. # 提取数据
    9. response = requests.get(url, headers={"User-Agent": UserAgent().chrome})
    10. # 通过xpath去解析
    11. e = etree.HTML(response.text)
    12. date_times = e.xpath('//tbody[@id="tdata"]/tr/td[1]/text()')
    13. trs = e.xpath('//tbody[@id="tdata"]/tr[not(@class)]')
    14. count =date_times.__len__()
    15. # 链接数据库
    16. client = pymysql.connect(host='localhost', port=3306, user='root', password='123456', charset='utf8', db='bangumi')
    17. cursor = client.cursor()
    18. # 插入数据的sql
    19. sql = 'insert into doubleballs values(0,%s,%s,%s)'
    20. # 查看数据是否存在
    21. select_new_sql = "select * from doubleballs where date_time = %s"
    22. # date_times.reverse()
    23. # 记录有多少条新数据
    24. index = 0
    25. for data_time in date_times:
    26. reslut = cursor.execute(select_new_sql, [data_time])
    27. # 判断数据是否存在
    28. if reslut == 1:
    29. index += 1
    30. index = count - index
    31. # 数据从新到旧排序
    32. # trs.reverse()
    33. date_times.reverse()
    34. trs.reverse()
    35. for i in range(index):
    36. # 提取红球
    37. red_ball = '-'.join(trs[index-i-1].xpath('./td[@class="chartBall01"]/text()'))
    38. # 提取蓝球
    39. blue_ball = trs[index-i-1].xpath('./td[@class="chartBall02"]/text()')[0]
    40. print("第" + date_times[index-i-1] + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    41. cursor.execute(sql, [date_times[index-i-1], red_ball, blue_ball])
    42. client.commit()
    43. # for data_time, tr in zip(date_times, trs):
    44. # red_ball = '-'.join(tr.xpath('./td[@class="chartBall01"]/text()'))
    45. # blue_ball = tr.xpath('./td[@class="chartBall02"]/text()')[0]
    46. # print("第" + data_time + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    47. # cursor.execute(sql, [data_time, red_ball, blue_ball])
    48. # client.commit()
    49. cursor.close()
    50. client.close()
上传的附件 cloud_download 双色球练习.py ( 1.94kb, 0次下载 )

tsd

发送私信

5
文章数
1
评论数
最近文章
eject