爬虫逆向破解翻译接口参数

发布时间:2024年01月11日

Python 请求baidu翻译接口:https://fanyi.baidu.com/v2transapi?from=zh&to=en

步骤一:?查找构建请求参数

JS 断点发现如下参数:

w = {
      from: _.fromLang,
      to: _.toLang,
      query: e,
      transtype: i,
      simple_means_flag: 3,
      sign: b(e),
      token: window.common.token,
      domain: k.getCurDomain(),
      ts: +new Date
},

?如图所示参数解析出来对应得值。?

from:翻译文字对应语言代码

to:需要翻译语言代码

query:翻译文字

transtype:固定值【realtime】

sign:根据翻译文字设定得动态加密值

simple_means_flag:固定值【3】

token:固定值【85ecac1256c1bc754ad63419393fecbc】

domain:固定值【common】

ts: 时间戳

步骤二:?断点验证请求种加密参数 并找到加密函数

主要逆向解析值为:sign

js文件查找定位到:b函数

            var r = null;
            t.exports = function(t) {
            var o, i = t.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
            if (null === i) {
                var a = t.length;
                a > 30 && (t = "".concat(t.substr(0, 10)).concat(t.substr(Math.floor(a / 2) - 5, 10)).concat(t.substr(-10, 10)))
            } else {
                for (var s = t.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), c = 0, u = s.length, l = []; c < u; c++)
                    "" !== s[c] && l.push.apply(l, function(t) {
                        if (Array.isArray(t))
                            return e(t)
                    }(o = s[c].split("")) || function(t) {
                        if ("undefined" != typeof Symbol && null != t[Symbol.iterator] || null != t["@@iterator"])
                            return Array.from(t)
                    }(o) || function(t, n) {
                        if (t) {
                            if ("string" == typeof t)
                                return e(t, n);
                            var r = Object.prototype.toString.call(t).slice(8, -1);
                            return "Object" === r && t.constructor && (r = t.constructor.name),
                            "Map" === r || "Set" === r ? Array.from(t) : "Arguments" === r || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r) ? e(t, n) : void 0
                        }
                    }(o) || function() {
                        throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")
                    }()),
                    c !== u - 1 && l.push(i[c]);
                var p = l.length;
                p > 30 && (t = l.slice(0, 10).join("") + l.slice(Math.floor(p / 2) - 5, Math.floor(p / 2) + 5).join("") + l.slice(-10).join(""))
            }
            for (var d = "".concat(String.fromCharCode(103)).concat(String.fromCharCode(116)).concat(String.fromCharCode(107)), h = (null !== r ? r : (r = window[d] || "") || "").split("."), f = Number(h[0]) || 0, m = Number(h[1]) || 0, g = [], y = 0, v = 0; v < t.length; v++) {
                var _ = t.charCodeAt(v);
                _ < 128 ? g[y++] = _ : (_ < 2048 ? g[y++] = _ >> 6 | 192 : (55296 == (64512 & _) && v + 1 < t.length && 56320 == (64512 & t.charCodeAt(v + 1)) ? (_ = 65536 + ((1023 & _) << 10) + (1023 & t.charCodeAt(++v)),
                g[y++] = _ >> 18 | 240,
                g[y++] = _ >> 12 & 63 | 128) : g[y++] = _ >> 12 | 224,
                g[y++] = _ >> 6 & 63 | 128),
                g[y++] = 63 & _ | 128)
            }
            for (var b = f, w = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(97)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(54)), k = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(51)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(98)) + "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(102)), x = 0; x < g.length; x++)
                b = n(b += g[x], w);
            return b = n(b, k),
            (b ^= m) < 0 && (b = 2147483648 + (2147483647 & b)),
            "".concat((b %= 1e6).toString(), ".").concat(b ^ f)
        }
    }

步骤三:?补充执行js环境参数

进行JS修补,补充环境之后得到如下代码:


var r = null;
var window = {}
window.gtk = '320305.131321201';

function e(t, e) {
	(null == e || e > t.length) && (e = t.length);
	for (var n = 0, r = new Array(e); n < e; n++)
		r[n] = t[n];
	return r
}

function n(t, e) {
	for (var n = 0; n < e.length - 2; n += 3) {
		var r = e.charAt(n + 2);
		r = "a" <= r ? r.charCodeAt(0) - 87 : Number(r),
		r = "+" === e.charAt(n + 1) ? t >>> r : t << r,
		t = "+" === e.charAt(n) ? t + r & 4294967295 : t ^ r
	}
	return t
}

function a( t ){
	
	var o, i = t.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
	if (null === i) {
		var a = t.length;
		a > 30 && (t = "".concat(t.substr(0, 10)).concat(t.substr(Math.floor(a / 2) - 5, 10)).concat(t.substr(-10, 10)))
	} else {
		for (var s = t.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), c = 0, u = s.length, l = []; c < u; c++)
			"" !== s[c] && l.push.apply(l, function(t) {
				if (Array.isArray(t))
					return e(t)
			}(o = s[c].split("")) || function(t) {
				if ("undefined" != typeof Symbol && null != t[Symbol.iterator] || null != t["@@iterator"])
					return Array.from(t)
			}(o) || function(t, n) {
				if (t) {
					if ("string" == typeof t)
						return e(t, n);
					var r = Object.prototype.toString.call(t).slice(8, -1);
					return "Object" === r && t.constructor && (r = t.constructor.name),
					"Map" === r || "Set" === r ? Array.from(t) : "Arguments" === r || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r) ? e(t, n) : void 0
				}
			}(o) || function() {
				throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")
			}()),
			c !== u - 1 && l.push(i[c]);
		var p = l.length;
		p > 30 && (t = l.slice(0, 10).join("") + l.slice(Math.floor(p / 2) - 5, Math.floor(p / 2) + 5).join("") + l.slice(-10).join(""))
	}
	
	for (var d = "".concat(String.fromCharCode(103)).concat(String.fromCharCode(116)).concat(String.fromCharCode(107)), h = (null !== r ? r : (r = window[d] || "") || "").split("."), f = Number(h[0]) || 0, m = Number(h[1]) || 0, g = [], y = 0, v = 0; v < t.length; v++) {
		var _ = t.charCodeAt(v);
		_ < 128 ? g[y++] = _ : (_ < 2048 ? g[y++] = _ >> 6 | 192 : (55296 == (64512 & _) && v + 1 < t.length && 56320 == (64512 & t.charCodeAt(v + 1)) ? (_ = 65536 + ((1023 & _) << 10) + (1023 & t.charCodeAt(++v)),
		g[y++] = _ >> 18 | 240,
		g[y++] = _ >> 12 & 63 | 128) : g[y++] = _ >> 12 | 224,
		g[y++] = _ >> 6 & 63 | 128),
		g[y++] = 63 & _ | 128)
	}
	
	for (var b = f, w = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(97)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(54)), k = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(51)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(98)) + "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(102)), x = 0; x < g.length; x++)
	{
		b = n(b += g[x], w); 
	}
	
	return b = n(b, k),
	(b ^= m) < 0 && (b = 2147483648 + (2147483647 & b)),
	"".concat((b %= 1e6).toString(), ".").concat(b ^ f)
}

将其另存为 getSignEncrypt.js 在node环境下运行可得到加密后参数值【sign】

步骤四:?Python 执行js动态得到加密参数

python种执行js需导入模块【execjs】

js_file = "D:/Projects/abc.js"
with open(js_file, "r")as f:
    js_tamp = f.read()

jsDrive = execjs.compile( js_tamp )
sign = jsDrive.call( 'a', trans )

步骤五:发送请求得到翻译后结果集合

?完整请求代码:

import time
import execjs
import requests


trans = input( '请输入要翻译得文案:' )

js_file = "./getSignEncrypt.js"
with open(js_file, "r")as f:
    js_tamp = f.read()

jsDrive = execjs.compile( js_tamp )
sign = jsDrive.call( 'a', trans )

data = {
    "from": "zh",
    "to": "zh",
    "query": trans,
    "simple_means_flag": 3,
    "sign": sign,
    "token": "85ecac1256c1bc754ad63419393fecbc",
    "domain": "common",
    "ts": int( time.time() * 1000 )
}

header = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "en,zh-CN;q=0.9,zh;q=0.8,ja;q=0.7",
    "Acs-Token": "1704798513422_1704883339706_WWWhQaSrjFtjvNWfXZYiy0mm0b94KOtCcvgpLipfwBwvOR3ETXvhJu4VUqdUbHqNk+wCNxdB7rnwbmJ7lc0uadQCtTsFWetHBUm7VUo71qkSo2G21OFmpjbZ9rhGL4hYgy3PTGUj/ErSbsyYOpl6OQE8PaAC3Sv2HipfGQ6B5z82IO5s9i3sejx6YKrG6ErNb+Uv019WU3giQFegmYk7IUm3CvnhN8nFvPaZwOyoXJzQ8W5JkZyMtFQVX7uoMHchTUvEMmsxDcb/ShDNWVWa+++L2xp+vCAW2+OR8Y2p3xpWDdhp3KEhC/kpGX75VkJARgZ3a4x2IvFWJrDhGtB/KwaPVqxSXr5DgLR9VpTDxKnS2zY/eOfOztMp/HEn1jf4d1LYkYwe01/MGac/mPINHW+N2kTHRpzsnA8u32kNcRsm1+ZGrh2m9tWaDebPF/Njf7yZHm5nwb/eKe0hiv6oBclv3+GXmznmG8Cc+U000Aoq8Zk+R9Ht0Ow7iaUM8RFD",
    "Host": "fanyi.baidu.com",
    "Origin": "https://fanyi.baidu.com",
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    "Referer": "https://fanyi.baidu.com",
    "X-Requested-With": "XMLHttpRequest",
    "Cookie": "REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BIDUPSID=8CD7BC00DC822E5612A2EF9AB4566896; PSTM=1693744950; BAIDUID=8CD7BC00DC822E56741E842C9C5EB981:SL=0:NR=10:FG=1; MCITY=-233%3A; H_WISE_SIDS_BFESS=39712_39841_39902_39819_39909_39934_39936_39933_39943_39941_39938_39930_39732_39997_40009; H_PS_PSSID=39997_40076; H_WISE_SIDS=39997_40076; BAIDUID_BFESS=8CD7BC00DC822E56741E842C9C5EB981:SL=0:NR=10:FG=1; ZFY=haZlWdnMqSuVP8UY3jzqH9yRVGZ9jtY4HRxDOliN:Azw:C; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BA_HECTOR=0k2l85852la120alal20ah8hg1uvbt1ipslhu1s; delPer=0; PSINO=7; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1704247282,1704711563,1704787335,1704883262; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1704883271; ab_sr=1.0.1_ZmVmZmQ1YzkwMTViMzBmNDFlMzgxNzNjZGIxZmUxYjRlNjE5MzZkNjllOWYzMzE5NjRlNGFlMDg3YWRmYjAyYzk0MGNjN2RjOTViOTU3ZTA3OTlkNWRjYTIyMDNhNzEwYTczYWQ3ZTA4MmE2ZTcxNjEyNDNmODgwZmM2ZDA0YzVhNDU5MDMyODg2YWJhYmE5OGEwMzU3MDQyN2QxNzVjYQ==",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
url = 'https://fanyi.baidu.com/v2transapi?from=zh&to=en'

print( data )
print( header )

resp = requests.post( url, data = data, headers = header )
retData =  resp.json()

print( retData[ 'trans_result'][ 'data' ][0] )

执行代码输出如下:

文章来源:https://blog.csdn.net/Q718330882/article/details/135510466
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。