web端的加密咱们之前稍微了解了一点,通过调试断点的方式可以还原出需要的加密方法,那么对于小程序的一些加密参数,咱们怎么解决呢,没有办法直接调试js,今天咱们就来看看如何解决小程序的加密参数。
小程序:得物App
对于一些服装鞋饰商家来说,得物app应该是他们经常拜访的地方了,对比商品价格等信息对于合理定价和出售是有很大帮助的,今天咱们就来看看如何爬取得物里的信息。
抓包工具,这里使用charles
js调试工具,这里使用vscode
咱们可以使用模拟器打开小程序或者pc端打开小程序或者真机打开小程序都是可以的,这里咱们以pc端为例,打开得物小程序,以搜索安踏为例。
我们可以看到这是个GET请求,有8个请求参数:除去sign别的参数都是一些请求条件,例如搜索词,排序方式,筛选条件,页码等等,这些参数都可以通过变换查询条件获取,就不一一细说了,这里主要解决的就是签名验证问题,也就是sign参数。
通过wxid搜索本地的wxapkg
这个_APP_.wxapkg就是pc端小程序加密后的包了,我们先对它进行解密
然后对解密得到的包进行反编译
这样就拿到了大部分的小程序源码
上面咱们拿到了反编译后的代码,当然这不是直接可以在小程序开发工具里运行调试的那种,不过解决加密参数的问题还是绰绰有余了,加密js主程一般在app-service.js里,通过抓包工具拿到的数据包和参数怎么可以大致猜测sign参数是通过md5加密得到的32位小写字母,我们这里可以尝试直接搜索md5
经过一番查找和确认,咱们找到了sign参数的生成位置,大致是通过对请求参数的排序和拼接,然后加上一个常量字符串,最后进行md5加密就可以得到sign参数了,进行整理可以得到
通过这个函数对请求参数进行处理,再进行md5加密就ok了,咱们来测试下
可以看到生成的sign参数和抓包的一模一样,数据也拿到了,那么说明咱们拿到的加密方法是没问题的,下面是具体加密逻辑和代码(js版和python版)
js版本:
function md5(md5str) {
var createMD5String = function(string) {
var x = Array()
var k, AA, BB, CC, DD, a, b, c, d
var S11 = 7,
S12 = 12,
S13 = 17,
S14 = 22
var S21 = 5,
S22 = 9,
S23 = 14,
S24 = 20
var S31 = 4,
S32 = 11,
S33 = 16,
S34 = 23
var S41 = 6,
S42 = 10,
S43 = 15,
S44 = 21
string = uTF8Encode(string)
x = convertToWordArray(string)
a = 0x67452301
b = 0xEFCDAB89
c = 0x98BADCFE
d = 0x10325476
for (k = 0; k < x.length; k += 16) {
AA = a
BB = b
CC = c
DD = d
a = FF(a, b, c, d, x[k + 0], S11, 0xD76AA478)
d = FF(d, a, b, c, x[k + 1], S12, 0xE8C7B756)
c = FF(c, d, a, b, x[k + 2], S13, 0x242070DB)
b = FF(b, c, d, a, x[k + 3], S14, 0xC1BDCEEE)
a = FF(a, b, c, d, x[k + 4], S11, 0xF57C0FAF)
d = FF(d, a, b, c, x[k + 5], S12, 0x4787C62A)
c = FF(c, d, a, b, x[k + 6], S13, 0xA8304613)
b = FF(b, c, d, a, x[k + 7], S14, 0xFD469501)
a = FF(a, b, c, d, x[k + 8], S11, 0x698098D8)
d = FF(d, a, b, c, x[k + 9], S12, 0x8B44F7AF)
c = FF(c, d, a, b, x[k + 10], S13, 0xFFFF5BB1)
b = FF(b, c, d, a, x[k + 11], S14, 0x895CD7BE)
a = FF(a, b, c, d, x[k + 12], S11, 0x6B901122)
d = FF(d, a, b, c, x[k + 13], S12, 0xFD987193)
c = FF(c, d, a, b, x[k + 14], S13, 0xA679438E)
b = FF(b, c, d, a, x[k + 15], S14, 0x49B40821)
a = GG(a, b, c, d, x[k + 1], S21, 0xF61E2562)
d = GG(d, a, b, c, x[k + 6], S22, 0xC040B340)
c = GG(c, d, a, b, x[k + 11], S23, 0x265E5A51)
b = GG(b, c, d, a, x[k + 0], S24, 0xE9B6C7AA)
a = GG(a, b, c, d, x[k + 5], S21, 0xD62F105D)
d = GG(d, a, b, c, x[k + 10], S22, 0x2441453)
c = GG(c, d, a, b, x[k + 15], S23, 0xD8A1E681)
b = GG(b, c, d, a, x[k + 4], S24, 0xE7D3FBC8)
a = GG(a, b, c, d, x[k + 9], S21, 0x21E1CDE6)
d = GG(d, a, b, c, x[k + 14], S22, 0xC33707D6)
c = GG(c, d, a, b, x[k + 3], S23, 0xF4D50D87)
b = GG(b, c, d, a, x[k + 8], S24, 0x455A14ED)
a = GG(a, b, c, d, x[k + 13], S21, 0xA9E3E905)
d = GG(d, a, b, c, x[k + 2], S22, 0xFCEFA3F8)
c = GG(c, d, a, b, x[k + 7], S23, 0x676F02D9)
b = GG(b, c, d, a, x[k + 12], S24, 0x8D2A4C8A)
a = HH(a, b, c, d, x[k + 5], S31, 0xFFFA3942)
d = HH(d, a, b, c, x[k + 8], S32, 0x8771F681)
c = HH(c, d, a, b, x[k + 11], S33, 0x6D9D6122)
b = HH(b, c, d, a, x[k + 14], S34, 0xFDE5380C)
a = HH(a, b, c, d, x[k + 1], S31, 0xA4BEEA44)
d = HH(d, a, b, c, x[k + 4], S32, 0x4BDECFA9)
c = HH(c, d, a, b, x[k + 7], S33, 0xF6BB4B60)
b = HH(b, c, d, a, x[k + 10], S34, 0xBEBFBC70)
a = HH(a, b, c, d, x[k + 13], S31, 0x289B7EC6)
d = HH(d, a, b, c, x[k + 0], S32, 0xEAA127FA)
c = HH(c, d, a, b, x[k + 3], S33, 0xD4EF3085)
b = HH(b, c, d, a, x[k + 6], S34, 0x4881D05)
a = HH(a, b, c, d, x[k + 9], S31, 0xD9D4D039)
d = HH(d, a, b, c, x[k + 12], S32, 0xE6DB99E5)
c = HH(c, d, a, b, x[k + 15], S33, 0x1FA27CF8)
b = HH(b, c, d, a, x[k + 2], S34, 0xC4AC5665)
a = II(a, b, c, d, x[k + 0], S41, 0xF4292244)
d = II(d, a, b, c, x[k + 7], S42, 0x432AFF97)
c = II(c, d, a, b, x[k + 14], S43, 0xAB9423A7)
b = II(b, c, d, a, x[k + 5], S44, 0xFC93A039)
a = II(a, b, c, d, x[k + 12], S41, 0x655B59C3)
d = II(d, a, b, c, x[k + 3], S42, 0x8F0CCC92)
c = II(c, d, a, b, x[k + 10], S43, 0xFFEFF47D)
b = II(b, c, d, a, x[k + 1], S44, 0x85845DD1)
a = II(a, b, c, d, x[k + 8], S41, 0x6FA87E4F)
d = II(d, a, b, c, x[k + 15], S42, 0xFE2CE6E0)
c = II(c, d, a, b, x[k + 6], S43, 0xA3014314)
b = II(b, c, d, a, x[k + 13], S44, 0x4E0811A1)
a = II(a, b, c, d, x[k + 4], S41, 0xF7537E82)
d = II(d, a, b, c, x[k + 11], S42, 0xBD3AF235)
c = II(c, d, a, b, x[k + 2], S43, 0x2AD7D2BB)
b = II(b, c, d, a, x[k + 9], S44, 0xEB86D391)
a = addUnsigned(a, AA)
b = addUnsigned(b, BB)
c = addUnsigned(c, CC)
d = addUnsigned(d, DD)
}
var tempValue = wordToHex(a) + wordToHex(b) + wordToHex(c) + wordToHex(d)
return tempValue.toLowerCase()
}
var rotateLeft = function(lValue, iShiftBits) {
return (lValue << iShiftBits) | (lValue >>> (32 - iShiftBits))
}
var addUnsigned = function(lX, lY) {
var lX4, lY4, lX8, lY8, lResult
lX8 = (lX & 0x80000000)
lY8 = (lY & 0x80000000)
lX4 = (lX & 0x40000000)
lY4 = (lY & 0x40000000)
lResult = (lX & 0x3FFFFFFF) + (lY & 0x3FFFFFFF)
if (lX4 & lY4) return (lResult ^ 0x80000000 ^ lX8 ^ lY8)
if (lX4 | lY4) {
if (lResult & 0x40000000) return (lResult ^ 0xC0000000 ^ lX8 ^ lY8)
else return (lResult ^ 0x40000000 ^ lX8 ^ lY8)
} else {
return (lResult ^ lX8 ^ lY8)
}
}
var F = function(x, y, z) {
return (x & y) | ((~x) & z)
}
var G = function(x, y, z) {
return (x & z) | (y & (~z))
}
var H = function(x, y, z) {
return (x ^ y ^ z)
}
var I = function(x, y, z) {
return (y ^ (x | (~z)))
}
var FF = function(a, b, c, d, x, s, ac) {
a = addUnsigned(a, addUnsigned(addUnsigned(F(b, c, d), x), ac))
return addUnsigned(rotateLeft(a, s), b)
}
var GG = function(a, b, c, d, x, s, ac) {
a = addUnsigned(a, addUnsigned(addUnsigned(G(b, c, d), x), ac))
return addUnsigned(rotateLeft(a, s), b)
}
var HH = function(a, b, c, d, x, s, ac) {
a = addUnsigned(a, addUnsigned(addUnsigned(H(b, c, d), x), ac))
return addUnsigned(rotateLeft(a, s), b)
}
var II = function(a, b, c, d, x, s, ac) {
a = addUnsigned(a, addUnsigned(addUnsigned(I(b, c, d), x), ac))
return addUnsigned(rotateLeft(a, s), b)
}
var convertToWordArray = function(string) {
var lWordCount
var lMessageLength = string.length
var lNumberOfWordsTempOne = lMessageLength + 8
var lNumberOfWordsTempTwo = (lNumberOfWordsTempOne - (lNumberOfWordsTempOne % 64)) / 64
var lNumberOfWords = (lNumberOfWordsTempTwo + 1) * 16
var lWordArray = Array(lNumberOfWords - 1)
var lBytePosition = 0
var lByteCount = 0
while (lByteCount < lMessageLength) {
lWordCount = (lByteCount - (lByteCount % 4)) / 4
lBytePosition = (lByteCount % 4) * 8
lWordArray[lWordCount] = (lWordArray[lWordCount] | (string.charCodeAt(lByteCount) << lBytePosition))
lByteCount++
}
lWordCount = (lByteCount - (lByteCount % 4)) / 4
lBytePosition = (lByteCount % 4) * 8
lWordArray[lWordCount] = lWordArray[lWordCount] | (0x80 << lBytePosition)
lWordArray[lNumberOfWords - 2] = lMessageLength << 3
lWordArray[lNumberOfWords - 1] = lMessageLength >>> 29
return lWordArray
}
var wordToHex = function(lValue) {
var WordToHexValue = '',
WordToHexValueTemp = '',
lByte, lCount
for (lCount = 0; lCount <= 3; lCount++) {
lByte = (lValue >>> (lCount * 8)) & 255
WordToHexValueTemp = '0' + lByte.toString(16)
WordToHexValue = WordToHexValue + WordToHexValueTemp.substr(WordToHexValueTemp.length - 2, 2)
}
return WordToHexValue
}
var uTF8Encode = function(string) {
string = string.toString().replace(/\x0d\x0a/g, '\x0a')
var output = ''
for (var n = 0; n < string.length; n++) {
var c = string.charCodeAt(n)
if (c < 128) {
output += String.fromCharCode(c)
} else if ((c > 127) && (c < 2048)) {
output += String.fromCharCode((c >> 6) | 192)
output += String.fromCharCode((c & 63) | 128)
} else {
output += String.fromCharCode((c >> 12) | 224)
output += String.fromCharCode(((c >> 6) & 63) | 128)
output += String.fromCharCode((c & 63) | 128)
}
}
return output
}
return createMD5String(md5str)
}
function a() {
var t = arguments.length > 0 && void 0 !== arguments[0] ? arguments[0] : {},
e = "",
n = function (t) {
if (null === t) return "";
if (t instanceof Array) {
var e = "";
return t.forEach((function (t) {
e.length > 0 && (e += ","),
e += JSON.stringify(t)
})), e
} return t instanceof Object ? JSON.stringify(t) : t.toString()
};
return e = Object.keys(t).sort().reduce((function (e, r) { return void 0 === t[r] ? e : "".concat(e).concat(r).concat(n(t[r])) }), ""),
e += "这里是拼接常量"
return e;
}
function getsign(e){
return md5(a(e))
}
sign = getsign({'title': '安踏',
'page': 0,
'sortType': '0',
'sortMode': '1',
'limit': 20,
'showHot': '1',
'isAggr': '1'});
console.log(sign)
python版本:
import hashlib
def hash_md5(s):
res = s + "这里是拼接常量"
h1 = hashlib.md5()
h1.update(res.encode(encoding='utf-8'))
return h1.hexdigest()
def getsign(dic):
s = str()
for i in sorted(dic):
s += i + str(dic[i])
return hash_md5(s)
if __name__ == '__main__':
p = {'title': '安踏',
'page': 0,
'sortType': '0',
'sortMode': '1',
'limit': 20,
'showHot': '1',
'isAggr': '1'}
sign = getsign(p)
print('sign:', sign)
本文提供了爬取得物小程序的完整思路以及99%代码,常量值放在了公众号(h5版和小程序版):
需要自取,看官们能自己解决自然是极好了。
得物app端的newSign和sign算法后边也会不定时更新,需要的看官们可以点波关注。