原标题:LMKD浅析(三)——Android Q新特性
修改标题原因:由于QCOM基线接触得晚了不少,所以一直以来以为如下这篇浅析是Android Q全平台适用的……
结果QCOM基线来了以后发现差异巨大,因此将此篇的标题改为LMKD浅析(三)——Android Q新特性(MTK篇)
Android Q引入新模式——PSI (Pressure Stall Information),根据注册对PSI信息的监听,并通过判断watermark,memfree,swapfree,thrashing等信息,更全面地判断当前系统压力,并进行针对性杀进程。
此模式需要依赖:
1、内核配置CONFIG_PSI=y;
2、属性ro.lmk.use_psi须不为false;
3、属性ro.config.low_ram为true,或ro.lmk.use_minfree_levels为false;
此模式可以配置参数:
1、ro.lmk.swap_free_low_percentage
此属性在Android P上即引入,用于作为kill条件判断,数值为整数,代表百分比,指当swap的可用空间低于一定比例时,lmkd开始进行kill操作;默认为10(当ro.config.low_ram=true时)/ 20;
2、ro.lmk.psi_partial_stall_ms
此属性为PSI模式特有属性,仅在PSI模式成功启用的条件下生效。代表上报数据的条件,数值为整数,代表毫秒值,指当psi监听到some级别失速在某一秒内超过多少毫秒时,上报压力值1(VMPRESS_LEVEL_MEDIUM)。
默认为200(当ro.config.low_ram=true时) / 70
3、ro.lmk.psi_complete_stall_ms
此属性为PSI模式特有属性,仅在PSI模式成功启用的条件下生效。代表上报数据的条件,数值为整数,代表毫秒值,指当psi监听到full级别失速在某一秒内超过多少毫秒时,上报压力值2(VMPRESS_LEVEL_CRITICAL)。
默认为700
关于PSI的some与full场景介绍,暂时不扩展介绍,详见linux kernel官方文档:https://www.kernel.org/doc/html/latest/accounting/psi.html
lmkd只使用了PSI中memory模块的信息,后续有空会另起一篇介绍。
LMKD工作流程:
1、开机启动lmkd后进行状态检查,加载各种属性并进行判断;
2、注册PSI监听;
3、当PSI上报压力值到lmkd时,lmkd会作如下判断来决定是否需要kill:
a. 当上一次kill未完成时,当获取时间失败时,或当解析vmstat/meminfo失败时,不进行kill;
/* Skip while still killing a process */
if (is_kill_pending()) {
/* TODO: replace this quick polling with pidfd polling if kernel supports */
goto no_kill;
}
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time");
return;
}
if (vmstat_parse(&vs) < 0) {
ALOGE("Failed to parse vmstat!");
return;
}
if (meminfo_parse(&mi) < 0) {
ALOGE("Failed to parse meminfo!");
return;
}
b. 当swap可用空间低于ro.lmk.swap_free_low_percentage属性定义的百分比时,设置swap_is_low = true;
/* Check free swap levels */
if (swap_free_low_percentage) {
if (!swap_low_threshold) {
swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100;
}
if (mi.field.free_swap < swap_low_threshold) {
swap_is_low = true;
}
}
c. 通过判断pgscan_direct/pgscan_kswapd字段较上一次的变化,确定内存回收的状态是直接回收(DIRECT_RECLAIM)还是通过swap回收(KSWAPD_RECLAIM),如果都不是(NO_RECLAIM),说明内存压力不大,不进行kill,否则获取thrashing值(通过判断refault页所占比例);
/* Identify reclaim state */
if (vs.field.pgscan_direct > init_pgscan_direct) {
init_pgscan_direct = vs.field.pgscan_direct;
init_pgscan_kswapd = vs.field.pgscan_kswapd;
reclaim = DIRECT_RECLAIM;
} else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) {
init_pgscan_kswapd = vs.field.pgscan_kswapd;
reclaim = KSWAPD_RECLAIM;
}
/* Skip if system is not reclaiming */
if (reclaim == NO_RECLAIM) {
in_reclaim = false;
goto no_kill;
}
if (!in_reclaim) {
/* Record file-backed pagecache size when entering reclaim cycle */
base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file;
init_ws_refault = vs.field.workingset_refault;
thrashing_limit = thrashing_limit_pct;
} else {
/* Calculate what % of the file-backed pagecache refaulted so far */
thrashing = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru;
}
in_reclaim = true;
d. 解析zoneinfo并计算min/low/hight水位线;
/* Refresh thresholds once per min in case user updated one of the margins */
if (thresholds.high_wmark == 0 || get_time_diff_ms(&threshold_update_tm, &curr_tm) > 60000) {
struct zoneinfo zi;
/*
* In unlikely case of failing we skip the update until the next opportunity
* but still rate limiting the updates even as we skip one.
*/
if (zoneinfo_parse(&zi) < 0) {
ALOGE("Failed to parse zoneinfo!");
} else {
calc_zone_thresholds(&zi, &thresholds);
}
threshold_update_tm = curr_tm;
}
e. 使用当前meminfo的数据来判断当前所处水位;
wmark = get_lowest_watermark(&mi, &thresholds);
f. 根据水位线、thrashing值、压力值、swap_low值、内存回收模式等进行多种场景判断,并添加不同的kill原因:
if (cycle_after_kill && wmark > WMARK_LOW) {
/* Prevent kills not freeing enough memory */
kill_reason = PRESSURE_AFTER_KILL;
strncpy(kill_desc, "min watermark is breached even after kill", sizeof(kill_desc));
} else if (level == VMPRESS_LEVEL_CRITICAL && events != 0) {
/* Device is too busy during lowmem event (kill to prevent ANR) */
kill_reason = NOT_RESPONDING;
strncpy(kill_desc, "device is not responding", sizeof(kill_desc));
} else if (swap_is_low && thrashing > thrashing_limit_pct) {
/* Page cache is thrashing */
kill_reason = LOW_SWAP_AND_THRASHING;
snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64
"kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)",
mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing);
} else if (swap_is_low && wmark > WMARK_HIGH) {
/* Both free memory and swap are low */
kill_reason = LOW_MEM_AND_SWAP;
snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%"
PRId64 "kB < %" PRId64 "kB)", wmark > WMARK_LOW ? "min" : "low",
mi.field.free_swap * page_k, swap_low_threshold * page_k);
} else if (wmark > WMARK_HIGH && thrashing > thrashing_limit) {
/*
* Record last time system was thrashing and cut thrasing limit by
* thrashing_limit_decay_pct percentage of the current thrashing amount
* until the system stops thrashing
*/
thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100;
kill_reason = LOW_MEM_AND_THRASHING;
min_score_adj = 200;
sprintf(kill_desc, "%s watermark is breached and thrashing (%" PRId64 "%%)",
wmark > WMARK_LOW ? "min" : "low", thrashing);
} else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) {
/* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */
thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100;
kill_reason = DIRECT_RECL_AND_THRASHING;
min_score_adj = 200;
snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%"
PRId64 "%%)", thrashing);
}
g. 如果任意条件满足,则进行kill操作:
/* Kill a process if necessary */
if (kill_reason != NONE) {
if (find_and_kill_process(min_score_adj, kill_desc) > 0) {
killing = true;
meminfo_log(&mi);
} else {
/* No eligible processes found, reset thrashing limit */
thrashing_limit = thrashing_limit_pct;
}
}
时间有限,先再次作罢,后续会更新:
1、PSI的内核实现、变量含义、节点查看;
2、针对部分手机配置的调优方式;