Virtio-gpu双heads+4.9.0内核+xserver1.9.3+modesettings0.5.0驱动
终端中打开大量文字内容,不停上下滑动,或cat大量内容的文件,操作过程中用户界面卡死,如下:
稳定复现方法:
$ cat /var/log/syslog
$ 上下滑动滚轮
卡死后:
kill pid of X
能解决这个问题
kill掉kde程序仍然死机,只有kill掉xserver程序才能弹回登录界面恢复响应,并且屏幕响应鼠标移动事件,不响应点击事件和键盘输入事件。
只在日志中发现kdm的报错:
localhost kdm_greet[2336]: Can’t open default user face
重新执行后发现内核中出现drm ioctl报错,并且重复测试后每次均会出现该问题:
[drm:drm_ioctl [drm]] pid=2285, dev=0xe200, auth=1, DRM_IOCTL_MODE_DIRTYFB
[drm:drm_mode_object_unreference [drm]] OBJ ID: 105 (6)
[drm:drm_ioctl [drm]] pid=2285, dev=0xe200, auth=1, DRM_IOCTL_MODE_DIRTYFB
[drm:drm_mode_object_unreference [drm]] OBJ ID: 105 (6)
[drm:drm_ioctl [drm]] pid=2285, dev=0xe200, auth=1, DRM_IOCTL_MODE_DIRTYFB
[drm:drm_mode_object_unreference [drm]] OBJ ID: 105 (6)
2019-10-16T01:34:21.108908+08:00 localhost kernel: [ 90.915464] [drm:drm_ioctl [drm]] ret = -22
-22出错代表EINVAL,通过strace跟踪xorg也发现了这个问题:
ioctl(10, 0xc01864b1, 0x7ffeaaed3e80) = -1 EINVAL (Invalid argument)
其中:
0xc01864b1代表的DRM_IOCTL_MODE_DIRTYFB
0x7ffeaaed3e80是参数地址
DRM_IOCTL_MODE_DIRTYFB调用执行如下:
int drm_mode_dirtyfb_ioctl(struct drm_device *dev,
void *data, struct drm_file *file_priv)
{
struct drm_clip_rect __user *clips_ptr;
struct drm_clip_rect *clips = NULL;
struct drm_mode_fb_dirty_cmd *r = data;
struct drm_framebuffer *fb;
unsigned flags;
int num_clips;
int ret;
if (!drm_core_check_feature(dev, DRIVER_MODESET))
return -EINVAL;
fb = drm_framebuffer_lookup(dev, r->fb_id);
if (!fb)
return -ENOENT;
num_clips = r->num_clips;
clips_ptr = (struct drm_clip_rect __user *)(unsigned long)r->clips_ptr;
if (!num_clips != !clips_ptr) {
ret = -EINVAL;
goto out_err1;
}
flags = DRM_MODE_FB_DIRTY_FLAGS & r->flags;
/* If userspace annotates copy, clips must come in pairs */
if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY && (num_clips % 2)) {
ret = -EINVAL;
goto out_err1;
}
if (num_clips && clips_ptr) {
if (num_clips < 0 || num_clips > DRM_MODE_FB_DIRTY_MAX_CLIPS) {
ret = -EINVAL;
goto out_err1;
}
clips = kcalloc(num_clips, sizeof(*clips), GFP_KERNEL);
if (!clips) {
ret = -ENOMEM;
goto out_err1;
}
ret = copy_from_user(clips, clips_ptr,
num_clips * sizeof(*clips));
if (ret) {
ret = -EFAULT;
goto out_err2;
}
}
if (fb->funcs->dirty) {
ret = fb->funcs->dirty(fb, file_priv, flags, r->color,
clips, num_clips);
} else {
ret = -ENOSYS;
}
out_err2:
kfree(clips);
out_err1:
drm_framebuffer_unreference(fb);
return ret;
}
这里偏怀疑是num_clips > DRM_MODE_FB_DIRTY_MAX_CLIPS导致的,在xorg driver中添加打印,卡死时打印出:
[ 141.634] (II) modeset(0): fd_id:105 clip num:554
而驱动中的定义如下:
#define DRM_MODE_FB_DIRTY_MAX_CLIPS 256
这也就导致了如下的执行:
if (num_clips < 0 || num_clips > DRM_MODE_FB_DIRTY_MAX_CLIPS) {
ret = -EINVAL;
goto out_err1;
}
在xorg驱动中对ioctl传入的clips num进行分片执行解决了这个问题,改动如下:
- ret = drmModeDirtyFB(ms->fd, fb_id, clip, num_cliprects);
- DamageEmpty(damage);
- if (ret) {
- if (ret == -EINVAL)
- return ret;
- }
+ drmModeClip *sig_clip = clip;
+ int sig_cliprects;
+ while(num_cliprects > 0) {
+ if(num_cliprects > DRM_MODE_FB_DIRTY_MAX_CLIPS) {
+ sig_cliprects = DRM_MODE_FB_DIRTY_MAX_CLIPS;
+ } else {
+ sig_cliprects = num_cliprects;
+ }
+ ret = drmModeDirtyFB(ms->fd, fb_id, sig_clip, sig_cliprects);
+ sig_clip += sig_cliprects;
+ num_cliprects -= sig_cliprects;
+ DamageEmpty(damage);
+ if (ret) {
+ if (ret == -EINVAL)
+ return ret;
+ }
+ }