命令为:pip install requests 和pip install lxml
reslutData打印结果:
?
import csv
# 因为要将文件存为csv格式
import requests
from lxml import etree
url="https://www.zj.gov.cn/"
# content=requests.get(url).text
# 上面这种方法无法处理乱码--不采用
# 采用下面这种
content=requests.get(url).content.decode()
# print(content)
with open("01.html",'w',encoding="utf-8") as f:
f.write(content)
f=open("01.html",'r',encoding="utf-8")
readData=f.read()
texts=etree.HTML(readData)
# 转化为html格式
reslutData=texts.xpath("//span[@class=\"Title\"]/text()")
# 如上图所示我们要的是带有Title类的span标签
print(reslutData)
with open("title.csv","w",encoding="utf-8",newline="") as f:
# newline参数可以帮助处理换行符解析的问题
writer=csv.writer(f)
# writer()的功能是创建一个常规Writer的对象,但是调用writer()
# 的writerow / writerows方法是要传入的是列表类型数据。所以下面输入数据要加[]
writer.writerow(['热点事件:'])
for i in reslutData:
writer.writerow([i])