爬虫系列更新-第二篇文章——《Python爬虫系列-有道批量翻译英文单词-注音标版》
之前发布计算机英文单词时研究了下,怎么把一个含有大量英文单词的txt文件翻译成如下格式:
如上图,左边图片是需要翻译的txt文本,右边图片是翻译后的txt文本。
运行的实际界面效果。
python代码参考了CSDN上的这个作者的帖子,他的分析博文很牛,但是没有批量翻译功能,所以我在他的代码的基础上添加了翻译中文、写入国际音标的功能,全部代码如下:
import hashlib
import base64
import requests
import json
import time
from urllib.parse import urlencode
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad, pad
class AESCipher(object):
key = b'ydsecret://query/key/B*RGygVywfNBwpmBaZg*WT7SIOUP2T0C9WHMZN39j^DAdaZhAnxvGcCY6VYFwnHl'
iv = b'ydsecret://query/iv/C@lZe2YzHtZ2CYgaXKSVfsb7Y4QWHjITPPZ0nQp87fBeJ!Iv6v^6fvi2WN@bYpJ4'
iv = hashlib.md5(iv).digest()
key = hashlib.md5(key).digest()
@staticmethod
def decrypt(data):
# AES解密
cipher = AES.new(AESCipher.key, AES.MODE_CBC, iv=AESCipher.iv)
decrypted = cipher.decrypt(base64.b64decode(data, b'-_'))
unpadded_message = unpad(decrypted, AES.block_size).decode()
return unpadded_message
@staticmethod
def encrypt(plaintext: str):
# AES加密
cipher = AES.new(AESCipher.key, AES.MODE_CBC, iv=AESCipher.iv)
plaintext = plaintext.encode()
padded_message = pad(plaintext, AES.block_size)
encrypted = cipher.encrypt(padded_message)
encrypted = base64.b64encode(encrypted, b'-_')
return encrypted
def get_form_data(sentence, from_lang, to_lang):
"""
构建表单参数
:param :sentence:翻译内容
:param from_lang:源语言
:param to_lang:目标语言
:return:
"""
e = 'fsdsogkndfokasodnaso'
d = 'fanyideskweb'
u = 'webfanyi'
m = 'client,mysticTime,product'
p = '1.0.0'
b = 'web'
f = 'fanyi.web'
t = time.time()
query = {
'client': d,
'mysticTime': t,
'product': u,
'key': e
}
# 获取sign值 - -密钥值
h = hashlib.md5(urlencode(query).encode('utf-8')).hexdigest()
form_data = {
'i': sentence,
'from': from_lang,
'to': to_lang,
'domain': 0,
'dictResult': 'true',
'keyid': u,
'sign': h,
'client': d,
'product': u,
'appVersion': p,
'vendor': b,
'pointParam': m,
'mysticTime': t,
'keyfrom': f
}
return form_data
def translate(sentence, from_lang='auto', to_lang=''):
"""
:param sentence:需翻译的句子
:param from_lang:源语言
:param to_lang:目标语言
:return:
"""
# 有道翻译网页请求参数
url = 'https://dict.youdao.com/webtranslate'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'referer': 'https://fanyi.youdao.com/',
'cookie': 'OUTFOX_SEARCH_USER_ID=-805044645@10.112.57.88; OUTFOX_SEARCH_USER_ID_NCOO=818822109.5585971;'
}
params = get_form_data(sentence, from_lang, to_lang)
res = requests.post(url, headers=headers, data=params)
# 翻译结果进行AES解密
cipher = AESCipher
ret = json.loads(cipher.decrypt(res.text))
#ret1 = json.dumps(ret,indent=4,ensure_ascii=False,sort_keys=False,separators=(",",";"))
try:
out = "英:[" + ret["dictResult"]["ec"]["word"]["ukphone"] + "] " + "美:[" + ret["dictResult"]["ec"]["word"]["usphone"] + "]"
trans = ret["dictResult"]["ec"]["word"]["trs"]
tgt = ret['translateResult'][0][0]['tgt']
out = out + " 译:[" + tgt + "]"
#for tran in trans:
#if 'pos' in tran:
#out = out + " " + tran['pos'] + " " + tran['tran']
#print(out)
return out
except Exception as e:
print('翻译失败:', e)
return 0
if __name__ == '__main__':
path = input("请输入你要翻译的txt文档路径(E:\1.txt): ")
# result = translate(word)
out = ""
with open(path,'r') as f:
lines = f.readlines()
for line in lines:
print(line.replace("\n","").replace("\r",""))
result = translate(line)#'zh-CHS', 'ja')
if result:
out = out + line.replace("\n","").replace("\r","") + " " + result + "\n"
#print(out)
#print('翻译结果:\n', result)
f.close()
with open(path[:-4] + "_已翻译.txt",'w',encoding='utf-8') as fout:
fout.write(out)
fout.close()
print(out)
print("已完成!")
调用方法如下:
把代码保存到.py文件中,运行.py文件,输入需要翻译的txt文本路径地址,如下图所示:
然后翻译后的txt,也会出现在之前的文本文件目录里,如下图:?
?上图中"1.txt"就是输入的英文文档,"1_已翻译.txt"就是翻译后的文档。