ceph——三副本读写——3

PrimaryLogPG::do_op(OpRequestRef& op)
|---if (m->finish_decode())    //解码什么?
|---hobject_t head = m->get_hobj()
// info.pgid.pgid.get_split_bits = unsigned pg_t::get_split_bits(unsigned pg_num) const  PG处于分裂状态?
|---if (m->has_flag(CEPH_OSD_FLAG_PARALLELEXEC))    //并行执行状态,直接返回失败
    |---osd->reply_op_error(op, -EINVAL); return 
|---if (op->rmw_flags == 0)
    |---int r = osd->osd->init_op_flags(op)
        |---
        |---
    |---if ( r )
        |---osd->reply_op_error(op, r);  return
|---if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS | CEPH_OSD_FLAG_LOCALIZE_READS)) && 
        op->may_read() && !(op->may_write() || op->may_cache()))   //此时只有副本才能执行操作
    |---handle_misdirected_op
        |---
|---if (!op_has_sufficient_caps(op))     //没有足够cap,则直接返回失败
    |---osd->reply_op_error(op, -EPERM); return
|---if (op->includes_pg_op())    //op is OpRequest  对于请求中包含对PG的操作 CEPH_OSD_RMW_FLAG_PGOD
    |---return do_pg_op(op);    //void PrimaryLogPG::do_pg_op(OpRequestRef op) 
        |---
|---对象长度,如果大于osd_max_object_name_len,则osd->reply_op_error(op, -ENAMETOOLONG); return
|---对象local key长度,如果大于osd_max_object_name_len,则osd->reply_op_error(op, -ENAMETOOLONG); return
|---对象local 名称空间长度,如果大于osd_max_object_name_len,则osd->reply_op_error(op, -ENAMETOOLONG); return
|---if (int r = osd->store->validate_hobject_key(head))  object的head是否有效
    |---无效 osd->reply_op_error(op, r); return
|---if (get_osdmap()->is_blacklisted(m->get_source_addr())) 
    |---检查op请求的地址是否在OSDMAP的blacklist中,如果是osd->reply_op_error(op, -EBLACKLISTED); return
|---bool write_ordered = op->rwordered();   //是否是写请求
|---int64_t poolid = get_pgid().pool();
|---if (op->may_write())    //如果是写请求
    |---const pg_pool_t *pi = get_osdmap()->get_pg_pool(poolid);    //获取对应的pool,pool获取失败,直接return?难道没有返回消息?
    |---if (m->get_snapid() != CEPH_NOSNAP)   //write to clone not valid
        |---reply_op_error
    |---if (cct->_conf->osd_max_write_size && m->get_data_len() > cct->_conf->osd_max_write_size << 20)
        |---写请求的数据大于osd_max_write_size << 20 则osd->reply_op_error(op, -EBLACKLISTED); return
    |---if (is_unreadable_object(head))   // head 有效
        |---如果不是主OSD则reply_op_error
        |---if (can_backoff && (g_conf->osd_backoff_on_degraded || (g_conf->osd_backoff_on_unfound && missing_loc.is_unfound(head))))
            |---条件成立:add_backoff(session, head, head); maybe_kick_recovery(head);尝试启动recovery
            |---条件不成立:maybe_kick_recovery(head);尝试启动recovery
            |---return
    |---if (write_ordered && scrubber.is_chunky_scrub_active() && write_blocked_by_scrub(head))
        |---waiting_for_scrub.push_back(op)   写请求并且
    |---if (write_ordered && blocked_iter != objects_blocked_on_degraded_snap.end())
        |---wait_for_degraded_object(to_wait_on, op);   head在objects_blocked_on_degraded_snap 则将op放入放入waiting_for_degraded_object
    |---if (write_ordered && blocked_snap_promote_iter != objects_blocked_on_snap_promotion.end())
        |---wait_for_blocked_object    检查head是否在objects_blocked_on_snap_promotion,如果是则将op放入waiting_for_blocked_object
    |---if (write_ordered && objects_blocked_on_cache_full.count(head))
        |---block_write_on_full_cache   检查head是否在objects_blocked_on_cache_full中,如果是则将op放入waiting_for_cache_not_full
    |---检查head的snapdir是否可读,如果不可读则将head的snapdir放入waiting_for_unreadable_object
    |---检查head的snapdir是否处于recovery或者backfill状态,如果是则将head的snapdir放入waiting_for_degraded_object
    |---略过流程
    |---if (obc->is_blocked() && !m->has_flag(CEPH_OSD_FLAG_FLUSH))
        |---object context是否处于IO block状态,如果是则将请求放入wait_for_blocked_object
    |---略过流程
    |---execute_ctx
|---

你可能感兴趣的:(ceph)