我们知道android lowmemorykiller机制有两套执行方案,在N之前的版本都是采用的kernel的lowmemorykiller.c里面的方式。最近查看了Android Go的代码结构发现,Android Go采用的是native的lmkd service的方式来起到lowmemorykiller的作用。具体实现流程如下:
我们知道kernel lowmemorykiller启动杀进程的条件是file_page-shmem-swapcache(-unevictable)的值低于AMS所预设的minfree的各档的阈值。这里如果是采用的lmkd的方式,则采用的是另外一套机制启动lmkd来杀进程。
首先讲一下lmkd的执行流程,其主要代码位于/system/core/lmkd/lmkd.c,其执行流程如下:
int main(int argc __unused, char **argv __unused) {
struct sched_param param = {
.sched_priority = 1,
};
………………
if (!init())//启动init()初始化函数
mainloop();//进入mainloop,检测是否有所监听的事件发生变化,如有,则调用event poll回调函数
ALOGI("exiting");
return 0;
static int init(void) {
struct epoll_event epev;
int i;
int ret;
………………
ctrl_lfd = android_get_control_socket("lmkd");//lmkd socket通信,获取AMS设置的minfree和adj的值
if (ctrl_lfd < 0) {
ALOGE("get lmkd control socket failed");
return -1;
}
ret = listen(ctrl_lfd, 1);
if (ret < 0) {
ALOGE("lmkd control socket listen failed (errno=%d)", errno);
return -1;
}
epev.events = EPOLLIN;
epev.data.ptr = (void *)ctrl_connect_handler;//epoll callback,当socket里面有数据变化是调用此函数
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_lfd, &epev) == -1) {//将ctrl_lfd添加到epollfd中去,当其有变化是,启东callback函数
ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno);
return -1;
}
maxevents++;
has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK);
use_inkernel_interface = has_inkernel_module && !is_go_device;//后面用来判断是否采用kernel的lowmemorykiller机制
if (use_inkernel_interface) {
ALOGI("Using in-kernel low memory killer interface");
} else {
ret = init_mp_medium();//如果不采用kernel lowmemorykiller机制,则执行此处初始化进程
ret |= init_mp_critical();
if (ret)
ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer");
}
………………
return 0;
}
static void cmd_target(int ntargets, int *params) {
………………
for (i = 0; i < ntargets; i++) {//将参数赋给本地变量
lowmem_minfree[i] = ntohl(*params++);
lowmem_adj[i] = ntohl(*params++);
}
…………
if (has_inkernel_module) {
char minfreestr[128];
char killpriostr[128];
minfreestr[0] = '\0';
killpriostr[0] = '\0';
for (i = 0; i < lowmem_targets_size; i++) {
char val[40];
if (i) {
strlcat(minfreestr, ",", sizeof(minfreestr));
strlcat(killpriostr, ",", sizeof(killpriostr));
}
snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_minfree[i] : 0);//通过use_inkernel_interface决定是否采用kernel lowmemorykiller机制
strlcat(minfreestr, val, sizeof(minfreestr));
snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_adj[i] : 0);
strlcat(killpriostr, val, sizeof(killpriostr));
}
writefilestring(INKERNEL_MINFREE_PATH, minfreestr);//往kernel节点里面写值
writefilestring(INKERNEL_ADJ_PATH, killpriostr);
}
而本文讲的是在Android Go上面采用另外的lmkd的方式来实现lowmemorykiller机制,下面是他的初始化:
static int init_mp_common(char *levelstr, void *event_handler, bool is_critical)
{
int mpfd;
int evfd;
int evctlfd;
char buf[256];
struct epoll_event epev;
int ret;
int mpevfd_index = is_critical ? CRITICAL_INDEX : MEDIUM_INDEX;
mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC);//memory.presure_level句柄
if (mpfd < 0) {
ALOGI("No kernel memory.pressure_level support (errno=%d)", errno);
goto err_open_mpfd;
}
evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY | O_CLOEXEC);//cgroup.event_control句柄
if (evctlfd < 0) {
ALOGI("No kernel memory cgroup event control (errno=%d)", errno);
goto err_open_evctlfd;
}
evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);//创建事件描述符,后面需要启动lmkd会通过往此文件描述符发信号来实现
if (evfd < 0) {
ALOGE("eventfd failed for level %s; errno=%d", levelstr, errno);
goto err_eventfd;
}
ret = snprintf(buf, sizeof(buf), "%d %d %s", evfd, mpfd, levelstr);//将关心的事件描述符,文件节点句柄和当前内存缺少状态写到buf中
if (ret >= (ssize_t)sizeof(buf)) {
ALOGE("cgroup.event_control line overflow for level %s", levelstr);
goto err;
}
ret = write(evctlfd, buf, strlen(buf) + 1);//将上面buf的内容写到cgroup.event_control,调用这个函数会触发后面要讲到的event_control的memcg_write_event_control函数执行
if (ret == -1) {
ALOGE("cgroup.event_control write failed for level %s; errno=%d",
levelstr, errno);
goto err;
}
epev.events = EPOLLIN;
epev.data.ptr = event_handler;//设置回调函数
ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, evfd, &epev);//将当前的监听的事件添加到监听事件列表中
if (ret == -1) {
ALOGE("epoll_ctl for level %s failed; errno=%d", levelstr, errno);
goto err;
}
maxevents++;
mpevfd[mpevfd_index] = evfd;
return 0;
err:
close(evfd);
err_eventfd:
close(evctlfd);
err_open_evctlfd:
close(mpfd);
err_open_mpfd:
return -1;
}
当往cgroup.event_control函数里面写event_fd,memory.pressure_level,memory缺少状态时,会执行以下函数:
static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
………………
efd = simple_strtoul(buf, &endp, 10);//也就是上文的evfd
if (*endp != ' ')
return -EINVAL;
buf = endp + 1;
cfd = simple_strtoul(buf, &endp, 10);//要监听的文件节点的fd
if ((*endp != ' ') && (*endp != '\0'))
return -EINVAL;
buf = endp + 1;
………………
efile = fdget(efd);//获取当前fd对应的文件结构体
if (!efile.file) {
ret = -EBADF;
goto out_kfree;
}
event->eventfd = eventfd_ctx_fileget(efile.file);//将当前efd赋值给event->eventfd
if (IS_ERR(event->eventfd)) {
ret = PTR_ERR(event->eventfd);
goto out_put_efile;
}
cfile = fdget(cfd);//获取cfd对应的file,也就是memory.pressure_level
if (!cfile.file) {
ret = -EBADF;
goto out_put_eventfd;
}
………………
} else if (!strcmp(name, "memory.pressure_level")) {
event->register_event = vmpressure_register_event;//event注册
event->unregister_event = vmpressure_unregister_event;
………………
ret = event->register_event(memcg, event->eventfd, buf);执行注册函数
if (ret)
goto out_put_css;
………………
spin_lock(&memcg->event_list_lock);
list_add(&event->list, &memcg->event_list);//将当前的event添加到memcg的event_list里面
spin_unlock(&memcg->event_list_lock);
………………
}
int vmpressure_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
{
………………
ev->efd = eventfd;将上文提到的eventfd赋值给efd
ev->level = level;
mutex_lock(&vmpr->events_lock);
list_add(&ev->node, &vmpr->events);//将当前ev->node添加到vmpr->event链表
mutex_unlock(&vmpr->events_lock);
………………
}
以上就是lmkd的完整初始化流程,从以上我们可以看出,主要是监听memory.pressure_level节点的变化,那么合适会触发监听的回调函数的执行呢?往下看:
在shrink_zone中有如下操作:
static bool shrink_zone(struct zone *zone, struct scan_control *sc,
bool is_classzone)
{
………………
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);//vmpressure函数执行,往下看
……………………
}
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
unsigned long scanned, unsigned long reclaimed)
{
………………
schedule_work(&vmpr->work);//启动work的执行函数vmpressure_work_fn
}
static void vmpressure_work_fn(struct work_struct *work)
{
………………
vmpressure_event(vmpr, scanned, reclaimed)
………………
}
static bool vmpressure_event(struct vmpressure *vmpr,
unsigned long scanned, unsigned long reclaimed)
{
………………
level = vmpressure_calc_level(scanned, reclaimed);//vmpressure level计算
list_for_each_entry(ev, &vmpr->events, node) {
if (level >= ev->level) {
eventfd_signal(ev->efd, 1);//触发lmkd回调函数开始工作
signalled = true;
}
}
………………
}