python爬取LOL所有英雄技能信息

工欲善其事 必先利其器

要想玩好LOL,那了解所有英雄的技能必然是其最基本的。所以此爬虫就应运而生

运行环境

python 3.7

此爬虫所用的库有

  1. requests (获取网页信息)
  2. openpyxl (Excel相关操作)
  3. pymysql (MySQL数据库相关操作)
  4. re (正则)

代码

下面有已打包为EXE的程序,可直接使用

主要代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import requests
import re
import openpyxl
import pymysql


def get_html(hero):
headers = {
'Referer': 'http://lol.qq.com/web201310/info-defail.shtml?id=' + hero,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
url = 'http://lol.qq.com/biz/hero/' + hero + '.js'
# print(url)
response = requests.get(url, headers=headers)
response = response.text
return response


def hero_info(response):
# 英雄名称
hero_name = re.findall(r'"name":"(.*?)","title"', response, re.S)[0]
hero_title = re.findall(r'"title":"(.*?)","tags"', response, re.S)[0]
# 技能(QWER)
hero_spells = re.findall(r'"spells":(.*?),"passive"', response, re.S)[0]
# 技能名称
hero_spells_name = re.findall(
r'"name":"(.*?)","description"', hero_spells, re.S)
# 技能描述
hero_spells_description = re.findall(
r'"description":"(.*?)","image"', hero_spells, re.S)
# 技能消耗
hero_spells_resource = re.findall(
r'"resource":"(.*?)"}', hero_spells, re.S)
# 技能主被动
hero_spells_group = re.findall(r'"group":"(.*?)","x"', hero_spells, re.S)
spells_Q = hero_spells_name[0] + ':' + hero_spells_description[0] + \
'|' + hero_spells_resource[0] + '|' + hero_spells_group[0]
spells_W = hero_spells_name[1] + ':' + hero_spells_description[1] + \
'|' + hero_spells_resource[1] + '|' + hero_spells_group[1]
spells_E = hero_spells_name[2] + ':' + hero_spells_description[2] + \
'|' + hero_spells_resource[2] + '|' + hero_spells_group[2]
spells_R = hero_spells_name[3] + ':' + hero_spells_description[3] + \
'|' + hero_spells_resource[3] + '|' + hero_spells_group[3]
Spells = spells_Q + '\n' + spells_W + '\n' + spells_E + '\n' + spells_R
# 被动技能
hero_passive = re.findall(r'"passive":(.*?),"lore"', response, re.S)[0]
# 被动技能名称
hero_passive_name = re.findall(
r'"name":"(.*?)","description"', hero_passive, re.S)[0]
# 技能描述
hero_passive_description = re.findall(
r'"description":"(.*?)","image"', hero_passive, re.S)[0]
# 技能主被动
hero_passive_group = re.findall(
r'"group":"(.*?)","x"', hero_passive, re.S)[0]
passive = hero_passive_name + ':' + \
hero_passive_description + '|' + hero_passive_group
hero_spells_info = [hero_name, hero_title, passive, Spells]
return hero_spells_info


def get_hero():
with open('hero', 'r') as f:
hero = f.readlines()
return hero


def save_to_excel(her):
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = '英雄称号'
ws['B1'] = '英雄名称'
ws['C1'] = '被动技能'
ws['D1'] = '主动技能'
for hero in her:
ws.append(hero)
wb.save('herotest.xlsx')


def save_to_mysql(her):
for i in her:
ch = '"' + i[0] + '"'
name = '"' + i[1] + '"'
bd_name = '"' + i[2] + '"'
zd_name = '"' + i[3] + '"'
db = pymysql.connect(host='localhost', user='root',
password='123456', database='python_mysql', charset='utf8')
cursor = db.cursor()
sql = ''' insert into lolheroinfo values (%s, %s, %s, %s);
''' % (ch, name, bd_name, zd_name)
# print(sql)
try:
# 执行sql语句
cursor.execute(sql)
# 提交到数据库执行
db.commit()
print(ch, ' insert into success!')
except:
db.rollback()

db.close()
return True


def main():
heros = get_hero()
her = []
for hero in heros:
hero = hero.split('"')[3]
response = get_html(hero)
her_infos = hero_info(response)
her_encode = []
for i in her_infos:
i = i.encode("latin-1").decode("unicode_escape")
her_encode.append(i)
her.append(her_encode)


save_to_excel(her)
#save_to_mysql(her)


if __name__ == '__main__':
main()
获取英雄数据文件

运行此文件后会在当前目录下生产hero的数据文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import requests
import re

headers = {
'Referer': 'https://lol.qq.com/data/info-defail.shtml?id=Aatrox',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
}

response = requests.get('https://lol.qq.com/biz/hero/champion.js', headers=headers)
keys = re.findall(r'"keys":{(.*?)},"data"',response.text,re.S)
keys = keys[0]
keys = keys.split(',')

with open('hero','w') as f:
for key in keys:
f.write(key)
f.write('\n')
print(key)

_如果想要保存到MySQL,请先创建MySQL数据库,然后用下面的代码创建表,最后取消主代码save_to_mysql(her)的注释即可_

创建数据表代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/python3

import pymysql

# 打开数据库连接
db = pymysql.connect("localhost","root","123456","python_mysql" )

# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()

# 使用 execute() 方法执行 SQL,如果表存在则删除
cursor.execute("DROP TABLE IF EXISTS EMPLOYEE")

# 使用预处理语句创建表
sql = """CREATE TABLE lolheroinfo (
英雄称号 CHAR(255),
英雄名称 CHAR(255),
被动技能 CHAR(255),
主动技能 varchar(999)
)
"""

cursor.execute(sql)

# 关闭数据库连接
db.close()

Excel版:
ivSn76.jpg
MySQL版:
ivSm0x.jpg
下面是上文代码打包为EXE版(pyinstaller)
主文件(Excel版):https://www.lanzous.com/i2dnmvg
主文件(MySQL版):https://www.lanzous.com/i2dnmej
数据文件:https://www.lanzous.com/i2dnn9a
获取数据文件:https://www.lanzous.com/i2dnm7c

最后温馨提示请合理使用爬虫