使用lttng trace ceph filestore的性能

作者: Yang Honggang

本文基于 ceph-10.2.2/Centos7.2 介绍如何使用 lttng trace ceph osd 的性能.

安装lttng工具

安装

  # yum install lttng-tools lttng-ust
  // 查看 trace 结果的工具
  # yum install babeltrace

启动

  // 我们只是trace user space 程序,关闭对kernel的trace
  # lttng-sessiond -d --no-kernel

编译、安装ceph

  // 这是xtao ceph的rpm编译方式,不是官网编译方式
  # ./configure --with-lttng --with-rpmcleanbuild
  # cd extras/LinuxRPM
  # make cephrpms
也可以强制打开 WITH_LTTNG 宏

#define WITH_LTTNG 

trace

启动ceph-osd

修改集群配置文件

  [osd.59]
  ...
  osd tracing = true
  osd objectstore tracing = true
  rados tracing = true
启动ceph-osd

# export LD_PRELOAD=/usr/lib64/liblttng-ust-fork.so && ceph-osd -i 59 -c /etc/ceph/xtao.conf --cluster xtao

list 可用的tracepoint

  [root@xt5 ~]# lttng list -u
  UST events:
  -------------
  ...
  PID: 13400 - Name: ceph-osd
        pg:queue_op (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_post (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_unknown (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_copy_from (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_copy_get (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_copy_get_classic (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omaprmkeys (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omapclear (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omapsetheader (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omapsetvals (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omap_cmp (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omapgetvalsbykeys (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omapgetheader (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omapgetvals (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_omapgetkeys (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_tmap2omap (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_tmapup (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_tmapput (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_tmapget (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_startsync (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_append (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
        osd:do_osd_op_pre_rmxattr (loglevel: TRACE_DEBUG_LINE (13)) (type: tracepoint)
   ....

创建trace session

  [root@xt5 ~]# lttng create osd.59.s1
  Session osd.59.s1 created.
  Traces will be written in /root/lttng-traces/osd.59.s1-20171109-103424

使能感兴趣的event

  // 使能 osd/pg/objectstore的 tracepoints
  [root@xt5 ~]# lttng enable-event -u objectstore:*
  UST event objectstore:* created in channel channel0
  [root@xt5 ~]# lttng enable-event -u osd:*
  UST event osd:* created in channel channel0
  [root@xt5 ~]# lttng enable-event -u pg:*
  UST event pg:* created in channel channel0

开始 trace

  [root@xt5 ~]# lttng start
  Tracing started for session osd.59.s1

开启负载

  // rgw 写操作
  [root@xt8 cos]# sh cli.sh submit conf/write/64m.conf

停止trace

  [root@xt5 ~]# lttng stop
  Waiting for data availability.
  Tracing stopped for session osd.59.s1

查看 trace 结果

查看生成的文件

  [root@xt5 ~]# find lttng-traces/
  lttng-traces/
  lttng-traces/osd.59.s1-20171109-103424
  lttng-traces/osd.59.s1-20171109-103424/ust
  lttng-traces/osd.59.s1-20171109-103424/ust/uid
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/channel0_0
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_0.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_1.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_2.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_3.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_4.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_5.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_6.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_7.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_8.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_9.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_10.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_11.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_12.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_13.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_14.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/index/channel0_15.idx
  lttng-traces/osd.59.s1-20171109-103424/ust/uid/0/64-bit/channel0_1
  ... 

关闭tracing会话

  // 不会删除生成的trace数据
  # lttng destroy
  Session osd.59.s1 destroyed

查看trace结果

  // 或者 # lttng view > result.all
  [root@xt5 ~]# babeltrace lttng-traces > result.all
  [root@xt5 ~]# du -sh result.all 
  35M     result.all

使用python脚本分析trace结果


安装 python3

  $ sudo rm /usr/bin/python -rf
  $ sudo ln -s /usr/bin/python3 /usr/bin/python
  $ python -V
如果yum不能使用了,需要yum

  // #!/usr/bin/python 改为 #!/usr/bin/python2.7
  $ sudo vim /usr/bin/yum

源码编译 babeltrace

Note:: 对于 Ubuntu 和 Debian 可以直接安装 python3-babeltrace

  $ wget http://www.efficios.com/files/babeltrace/babeltrace-1.5.3.tar.bz2
  $ tar -jxf babeltrace-XXX.tar.bz2
  $ cd babeltrace-1.5.3/
  $ ./configure --enable-python-bindings
  $ make -j4
  $ sudo make install

配置python路径

python包安装到了 /usr/local/lib64/python3.4/site-packages/babeltrace**

export PYTHONPATH=/usr/local/lib64/python3.4/site-packages/babeltrace:$PYTHONPATH

配置lib的路径

库文件安装到了 /usr/local/lib 目录

  $ sudo vim /etc/ld.so.conf.d/sb.conf
  $ sudo ldconfig 
  $ cat /etc/ld.so.conf.d/sb.conf 
  /usr/local/lib

增加trace内容

无需修改代码就可以增加的trace 事件有

  # lttng add-context -u --list 
  pid
  procname
  prio
  nice
  vpid
  tid
  pthread_id
  vtid
  ppid
  vppid
  hostname
  ip
  interruptible
  preemptible
  need_reschedule
  migratable
  perf:cpu:cpu-cycles
  perf:cpu:cycles
  ...
这里以 FileStore::_do_transaction 中的 enter/exit tracepoint 为例子,增加 pthread_id,用于
帮助我们获取 同名操作的enter/exit 配对问题。

  // 无法处理下面的配对
  [10:42:03.143188578] (+0.000009326) xt5 objectstore:write_enter: { cpu_id = 0 }, { osr_name = "97.15s3", offset = 1398016, length = 1376 }
  [10:42:03.143478705] (+0.000009134) xt5 objectstore:write_enter: { cpu_id = 3 }, { osr_name = "97.2bcs2", offset = 0, length = 1398016 }
  [10:42:03.143607910] (+0.000129205) xt5 objectstore:write_exit: { cpu_id = 10 }, { retval = 1376 }
  [10:42:03.144973679] (+0.001220396) xt5 objectstore:write_exit: { cpu_id = 3 }, { retval = 1398016 }

增加 pthread_id trace context

增加该context后,再次trace

  # lttng add-context -u -t pthread_id
  UST context pthread_id added to all channels

trace结果

  [09:33:50.192905016] (+0.000008206) xt5 objectstore:write_enter: { cpu_id = 14 }, { pthread_id = 139909850027776 }, { osr_name = "97.319s2", offset = 0, length = 1398016 }
  [09:33:50.193288708] (+0.000004744) xt5 objectstore:write_enter: { cpu_id = 11 }, { pthread_id = 139909858420480 }, { osr_name = "97.35es2", offset = 1398016, length = 1376 }
  [09:33:50.193309999] (+0.000021291) xt5 objectstore:write_exit: { cpu_id = 11 }, { pthread_id = 139909858420480 }, { retval = 1376 }
  [09:33:50.193558676] (+0.000006137) xt5 objectstore:write_enter: { cpu_id = 11 }, { pthread_id = 139909858420480 }, { osr_name = "97.68s2", offset = 1398016, length = 1376 }
  [09:33:50.193576254] (+0.000017578) xt5 objectstore:write_exit: { cpu_id = 11 }, { pthread_id = 139909858420480 }, { retval = 1376 }
  [09:33:50.194999952] (+0.000134932) xt5 objectstore:write_exit: { cpu_id = 14 }, { pthread_id = 139909850027776 }, { retval = 1398016 }

lttng 结果处理脚本示例

需要自己写脚本来处理trace的结果。下面给出一个示例:

objectstore.py

#!/usr/bin/env python
# -*- coding: utf-8 -*
# vi:set tw=0 ts=4 sw=4 nowrap fdm=indent
# python dump.py ../data/ltt.752M/osd.59.s1-20171109-103424/ust/uid/0/64-bit/
# Yang Honggang 

import json
import sys
from babeltrace import *

# all events
g_events = set()
g_summary = {}

def update_summary(evt_name, cost):
  g_events.add(evt_name)

  if evt_name not in g_summary:
    g_summary[evt_name] = {
          'max': cost,
          'min': cost,
          'sum': cost,
          'count': 1
        }
  else:
    e = g_summary[evt_name]
    if e['max'] < cost:
      e['max'] = cost
    if cost < e['min']:
      e['min'] = cost

    e['sum'] += cost
    e['count'] += 1

def update():
  for k, v in g_summary.items():
    v['avg'] = float(v['sum']) // v['count']

    v['avg'] /= float(1000000000)
    v['min'] /= float(1000000000)
    v['max'] /= float(1000000000)
    v['sum'] /= float(1000000000)

if __name__ == '__main__':

  traces = TraceCollection()
  ret = traces.add_trace(sys.argv[1], "ctf")

  event_type = 'all'
  if len(sys.argv) == 3:
    event_type = str(sys.argv[2])

  events_enter = {}
  events_enter_set = set()
  for event in traces.events:
    # filter
    if event_type != 'all' and event_type not in event.name:
      continue

    if event.name.startswith('objectstore:') and event.name.endswith('_enter'):
      op_tag = event.name + str(event['pthread_id'])
      # print ('enqueue %s' % op_tag)
      assert(op_tag not in events_enter_set), ("dump events_enter_set: %s" % str(events_enter_set))
      events_enter_set.add(op_tag)
      events_enter[op_tag] = {
              'ts': event.timestamp,
            }

    if event.name.startswith('objectstore:') and event.name.endswith('_exit'):
      op_tag = event.name[:-4] # get rid of tail 'exit'
      op_tag += 'enter' + str(event['pthread_id'])
      # print ('dequeue %s' % op_tag)
      try:
        e = events_enter[op_tag]
        cost = event.timestamp - e['ts']

        update_summary(event.name[:-5], cost)
        del events_enter[op_tag]
        events_enter_set.remove(op_tag)

        print ('%s %d' % (event.name[:-5], cost))
      except KeyError:
        print ('drop %s' % str(event))

  # update avg
  update()
  print (json.dumps(g_summary, indent=4))


  // all 表示展示所有objectstore事件
  // 也可以指定 rmkeys 等其他具体事件
  $ python objectstore.py ../data/pid/osd.59.s2-20171110-092920/ust/uid/0/64-bit/ all
  ...
  objectstore:getattr 147437
  objectstore:omap_setkeys 102982
  objectstore:coll_move_rename 10100543
  {
      "objectstore:omap_rmkeys": {
          "max": 0.001172804,
          "min": 0.000141518,
          "count": 15,
          "avg": 0.000486413,
          "sum": 0.007296197
      },
  ...

dump所有事件

#!/usr/bin/env python
# -*- coding: utf-8 -*
# vi:set tw=0 ts=4 sw=4 nowrap fdm=indent
# python dump.py ../data/ltt.752M/osd.59.s1-20171109-103424/ust/uid/0/64-bit/
# Yang Honggang 

import json
import sys
from babeltrace import *

traces = TraceCollection()
ret = traces.add_trace(sys.argv[1], "ctf")

for event in traces.events:
  # import pdb;pdb.set_trace()
  item = {}
  item['ts'] = event.timestamp
  item['name'] = event.name
  item['body'] = dict(event)
  item['body']['uuid'] = str(item['body']['uuid'])
  print (json.dumps(dict(item), indent=4))

dump.py脚本:


  $ python dump.py ../data/pid/osd.59.s2-20171110-092920/ust/uid/0/64-bit/ | more
  {
      "name": "objectstore:remove_enter",
      "ts": 1510277629825390663,
      "body": {
          "v": {
              "timestamp": 165143897895942,
              "id": 89
          },
          "osr_name": "97.1b6s1",
          "cpu_id": 6,
          "content_size": 1061576,
          "timestamp_end": 166030605917153,
          "packet_size": 1081344,
          "timestamp_begin": 164884122908956,
          "pthread_id": 139909858420480,
          "id": "extended",
          "uuid": "[115, 96, 191, 41, 224, 55, 68, 67, 152, 213, 166, 29, 106, 229, 239, 70]",
          "events_discarded": 0,
          "magic": 3254525889,
          "stream_id": 0,
          "packet_seq_num": 0,
          "stream_instance_id": 6
      }
  }

参考

[1] tracing ceph with lttng https://nwat.io/blog/2014/06/01/tracing-ceph-with-lttng/

[2] tracing you own user application http://lttng.org/docs/v2.10/#doc-tracing-your-own-user-application

[3] LTTng installation on CentOS 7.2 http://frederic-wou.net/lttng/

[4] 端到端trace ceph http://victoraraujo.me/babeltrace-zipkin/

[5] Tracing Ceph With BlkKin http://docs.ceph.com/docs/master/dev/blkin/

[6] Unable to see LTTng tracepoints in Ceph  http://ceph-users.ceph.narkive.com/8gg0Rt9H/unable-to-see-lttng-tracepoints-in-ceph

[7] babeltrace python bindings 的 api http://diamon.org/babeltrace/docs/python/reader



















你可能感兴趣的:(ceph,lttng,ceph,filestore)