背景
一些类似的项目会使用重复的代码,导致苹果机审期间被判断为马甲包,还没到人审就被苹果审核拒掉了。
为了能够使正常迭代出功能相似的项目过审,要对我们项目的代码进行深度混淆。
方案
1、准备四六级单词库(如果使用随机字符会被机审查出来);
2、使用clang过滤出类名和方法名;
3、从四六级单词库随机组成,映射对应的类名和方法名;
4、通过映射进行混淆操作。
四六级单词库
区分首字母小写和大写的txt
安装clang
pip install clang --user
类名混淆
提取类名
# encoding: utf-8
import sys
import os
import re
import clang
from clang.cindex import *
from optparse import OptionParser, OptionGroup
def get_tu(source, lang='c', all_warnings=False, flags=[]):
"""Obtain a translation unit from source and language.
By default, the translation unit is created from source file "t."
where is the default file extension for the specified language. By
default it is C, so "t.c" is the default file name.
Supported languages are {c, cpp, objc}.
all_warnings is a convenience argument to enable all compiler warnings.
"""
args = list(flags)
name = 't.c'
if lang == 'cpp':
name = 't.cpp'
args.append('-std=c++11')
elif lang == 'objc':
name = 't.m'
elif lang != 'c':
raise Exception('Unknown language: %s' % lang)
if all_warnings:
args += ['-Wall', '-Wextra']
return TranslationUnit.from_source(name, args, unsaved_files=[(name,
source)])
def generate_m_file(file_text, result_lines, ret_functions):
//略...
if __name__ == '__main__':
libclangPath = '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib'
Config.set_library_file(libclangPath)
# Find all .h files
source_dir = sys.argv[1]
g = os.walk(source_dir)
h_files = []
ret_functions = []
for path,dir_list,file_list in g:
for file_name in file_list:
h_files.append(os.path.join(path, file_name))
for f in h_files:
with open(f, 'r') as file:
# preprocess
regex = r'#import|#include|#ifdef|#ifndef|#define|#endif|@property'
text = ''
result_text_lines = []
line_count = 0
def_block_count = 0
for line in file:
if re.findall(regex, line):
if '#ifdef' in line:
def_block_count += 1
result_text_lines.append(line)
elif '#endif' in line and def_block_count > 0:
def_block_count -= 1
result_text_lines.append(line)
else:
result_text_lines.append('\n')
line = '// ' + line
text += line
else:
text += line
result_text_lines.append('\n')
line_count += 1
# print text
m_file_name = os.path.join('fake', f.replace('.h', '.m'))
m_file_to_write = ""
# print 'processing: ' + m_file_name
generate_m_file(text, result_text_lines, ret_functions)
unique_array = list(set(ret_functions))
filter_array = ['xxxx', 'aaaa', 'dddd', 'AppDelegate', \
'PrefixHeader', 'dddddf', 'aaaadxxx']
for func_item in unique_array:
if func_item in filter_array:
continue
print func_item
filter_array
为要筛选掉不做混淆的类名
对提取的类名做映射类名
#!/usr/bin/env bash
TABLENAME=symbols
SYMBOL_DB_FILE="symbols"
STRING_SYMBOL_FILE=./process_class/t.txt
HEAD_FILE=./rename-class/rename_classes.txt
export LC_CTYPE=C
rm -f $SYMBOL_DB_FILE
rm -f $HEAD_FILE
function rand(){
min=$1
max=$(($2-$min+1))
num=$(($RANDOM+1000000000)) #增加一个10位的数再求余
echo $(($num%$max+$min))
}
function pRnd2(){
rnd=$(rand 10 4200)
randint=`expr $RANDOM % 3`
if [ $randint == 0 ];then
echo `cat "JAAA.txt" | sed -n "${rnd}p"`
elif [ $randint == 1 ];then
echo `cat "JBBB.txt" | sed -n "${rnd}p"`
else
echo `cat "JCCC.txt" | sed -n "${rnd}p"`
fi
}
my_arr=("Manager" "DataSource" "Helper" "Adapter" "Router" "Handler" "Handle" \
"Model" "Service" "Item" "Info" "Controller" "Cell" "Button" "View" "Window")
touch $HEAD_FILE
# echo "//confuse string at `date`" >> $HEAD_FILE
cat "$STRING_SYMBOL_FILE" | while read -ra line; do
#命中概率
#randint=`expr $RANDOM % 3`
#if [ $randint != 0 ];then
#continue
#fi
#取出随机字符
if [[ ! -z "$line" ]]; then
suffix=""
for loop in ${my_arr[@]}; do
if [[ $line =~ $loop ]]; then
suffix=$loop
break
fi
done
ramdom="CS$(pRnd2)$(pRnd2)${suffix}"
echo $line $ramdom
#insertValue $line $ramdom
echo "$line $ramdom" >> $HEAD_FILE
fi
done
ramdom
可以添加一些项目前缀,比如CS等。
my_arr
里面可以定义一些iOS特有的后缀。
对映射的类做混淆
#!/bin/bash
PROJECT_DIR=`cat ../path.txt`
echo $PROJECT_DIR
RENAME_CLASSES=rename_classes.txt
#First, we substitute the text in all of the files.
sed_cmd=`sed -e 's@^@s/[[:<:]]@; s@[[:space:]]\{1,\}@[[:>:]]/@; s@$@/g;@' ${RENAME_CLASSES} `
find ${PROJECT_DIR} -type f \
\( -name "*.pbxproj" -or -name "*.pch" -or -name "*.h" -or -name "*.m" -or -name "*.xib" -or -name "*.storyboard" \) \
-exec sed -i "" "${sed_cmd}" {} +
# Now, we rename the .h/.m files
while read line; do
class_from=`echo $line | sed "s/[[:space:]]\{1,\}.*//"`
class_to=`echo $line | sed "s/.*[[:space:]]\{1,\}//"`
#修改 .h .m
find ${PROJECT_DIR} -type f -regex ".*[[:<:]]${class_from}[[:>:]][^\/]*\.[hm]" -print | egrep -v '.bak$' | \
while read file_from; do
file_to=`echo $file_from | sed "s/\(.*\)[[:<:]]${class_from}[[:>:]]\([^\/]*\)/\1${class_to}\2/"`
echo mv "${file_from}" "${file_to}"
mv "${file_from}" "${file_to}"
done
#修改 .xib
find ${PROJECT_DIR} -type f -regex ".*[[:<:]]${class_from}[[:>:]][^\/]*\.xib" -print | egrep -v '.bak$' | \
while read file_from; do
file_to=`echo $file_from | sed "s/\(.*\)[[:<:]]${class_from}[[:>:]]\([^\/]*\)/\1${class_to}\2/"`
echo mv "${file_from}" "${file_to}"
mv "${file_from}" "${file_to}"
done
done < ${RENAME_CLASSES}
rename_classes.txt
是保存的映射类,shell脚本对工程进行批量替换。
方法混淆
提取方法名
# encoding: utf-8
import sys
import os
import re
import clang
from clang.cindex import *
from optparse import OptionParser, OptionGroup
def get_tu(source, lang='c', all_warnings=False, flags=[]):
"""Obtain a translation unit from source and language.
By default, the translation unit is created from source file "t."
where is the default file extension for the specified language. By
default it is C, so "t.c" is the default file name.
Supported languages are {c, cpp, objc}.
all_warnings is a convenience argument to enable all compiler warnings.
"""
args = list(flags)
name = 't.c'
if lang == 'cpp':
name = 't.cpp'
args.append('-std=c++11')
elif lang == 'objc':
name = 't.m'
elif lang != 'c':
raise Exception('Unknown language: %s' % lang)
if all_warnings:
args += ['-Wall', '-Wextra']
return TranslationUnit.from_source(name, args, unsaved_files=[(name,
source)])
def parse_method(node):
tokens = list(node.get_tokens())
# 过滤方法名, TODO:
filter_start_words = ('init', 'set', 'get', 'image', 'view', 'reload', '_', 'will', 'did')
function = ''
for token_index in range(len(tokens)):
if tokens[token_index].spelling == ')':
function = tokens[token_index + 1].spelling
break
if len(function) > 10 and (not function.startswith(filter_start_words)):
return function
else:
return ''
# extract_type = 0x00001: 普通方法
# extract_type = 0x00011: 普通方法 + 属性
def parse_symbols(cursor, ret_symbols, extract_type):
//略...
# extract_type = 0x01100: Category, Class
# extract_type = 0x10000: Protocol
def extract_symbols(file_text, ret_symbols, extract_type):
parser = OptionParser("usage: %prog [options] {filename} [clang-args*]")
parser.disable_interspersed_args()
(opts, args) = parser.parse_args()
# if len(args) == 0:
# parser.error('invalid number arguments')
index = Index.create()
# tu = index.parse(file_text, ['-x', 'objective-c'])
tu = get_tu(file_text, lang='objc')
if not tu:
parser.error("unable to load input")
it = tu.cursor.get_children()
tu_nodes = list(it)
for cursor in tu_nodes:
if cursor.kind == CursorKind.OBJC_INTERFACE_DECL:
# print cursor.spelling
if extract_type & 0x00100:
parse_symbols(cursor, ret_symbols, extract_type)
elif cursor.kind == CursorKind.OBJC_CATEGORY_DECL:
# print cursor.spelling
if extract_type & 0x01000:
# print "Categor ============"
parse_symbols(cursor, ret_symbols, extract_type)
elif cursor.kind == CursorKind.OBJC_PROTOCOL_DECL:
if extract_type & 0x10000:
parse_symbols(cursor, ret_symbols, extract_type)
# 提取该目录下所有 .h&.m文件的方法名
def traverse_header_files(top_directory, extract_type):
g = os.walk(top_directory)
h_files = []
ret_symbols = []
for path,dir_list,file_list in g:
for file_name in file_list:
if file_name.endswith('.h') or file_name.endswith('.m'):
h_files.append(os.path.join(path, file_name))
for f in h_files:
with open(f, 'r') as file:
# preprocess
regex = r'#import|#include|#ifdef|#ifndef|#define|#endif|#if|#else|@class'
text = ''
result_text_lines = []
line_count = 0
def_block_count = 0
for line in file:
if re.findall(regex, line):
line = '// ' + line
text += line
else:
interface_idx = line.find('@interface')
if interface_idx > 0:
line = line[interface_idx:]
# print line
text += line
line_count += 1
# print 'processing: '
extract_symbols(text, ret_symbols, extract_type)
return set(ret_symbols).copy()
if __name__ == '__main__':
libclangPath = '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib'
Config.set_library_file(libclangPath)
source_dir = sys.argv[1]
pods_dir = sys.argv[2]
# 提取头文件的方法名
source_dir_methods_set = traverse_header_files(source_dir, 0x00101)
# 提取 Source 的属性
filter_set_A = traverse_header_files(source_dir, 0x10110)
# 提取 Source 下的 Category的方法和属性
filter_set_B = traverse_header_files(source_dir, 0x11011)
# 提取 Pods 目录下普通类和Category 的方法和属性
filter_set_C = traverse_header_files(pods_dir, 0x11111)
# 差集
result_set = source_dir_methods_set.difference(filter_set_A).difference(filter_set_B).difference(filter_set_C)
unique_list = list(result_set)
for func_item in unique_list:
print func_item
# print 'Source len = ' + str(len(source_dir_methods_set))
# print 'Result len = ' + str(len(result_set))
source_dir为项目的代码目录,pods_dir为pods的代码目录。由于pods里面的是三方的代码,所以进行在项目代码里排除掉,再对剩下的方法进行映射。
对方法映射并宏定义写入文件
#!/usr/bin/env bash
STRING_SYMBOL_FILE=./process_method/method_list.txt
HEAD_FILE=./methodDefine.h
export LC_CTYPE=C
rm -f $HEAD_FILE
function rand(){
min=$1
max=$(($2-$min+1))
num=$(($RANDOM+1000000000)) #增加一个10位的数再求余
echo $(($num%$max+$min))
}
function pRnd1(){
rnd=$(rand 10 140000)
randt=`expr $RANDOM % 3`
if [ $randt == 0 ];then
echo `cat "a.txt" | sed -n "${rnd}p"`
elif [ $randt == 1 ];then
echo `cat "b.txt" | sed -n "${rnd}p"`
else
echo `cat "c.txt" | sed -n "${rnd}p"`
fi
}
function pRnd2(){
rnd=$(rand 10 140000)
randt=`expr $RANDOM % 3`
if [ $randt == 0 ];then
echo `cat "AAA.txt" | sed -n "${rnd}p"`
elif [ $randt == 1 ];then
echo `cat "BBB.txt" | sed -n "${rnd}p"`
else
echo `cat "CCC.txt" | sed -n "${rnd}p"`
fi
}
touch $HEAD_FILE
echo '#ifndef methodDefine_h
#define methodDefine_h' >> $HEAD_FILE
echo "//confuse string at `date`" >> $HEAD_FILE
cat "$STRING_SYMBOL_FILE" | while read -ra line; do
#命中概率
#randint=`expr $RANDOM % 3`
#if [ $randint != 0 ];then
#continue
#fi
#取出随机字符
if [[ ! -z "$line" ]]; then
ramdom="$(pRnd1)$(pRnd2)"
echo $line $ramdom
echo "#ifndef $line
#define $line $ramdom
#endif" >> $HEAD_FILE
fi
done
echo "#endif" >> $HEAD_FILE
导入文件
prefixHeader导入methodDefine.h文件,方法混淆完成