在harfbuzz-ng中,实际上存在两种层面的shaper,一种是比较宏观的字库shaper,这类shaper一般都针对于特定的字库文件类型来写,比如graphite2 shaper,ot shaper等。还有一种就是语言的shaper,这种shaper是harfbuzz-ng所实现的ot shaper的一个子部分,这种shaper一般都针对于特定的语言或script来写,比如印度语系的shaper,泰语/老挝语的shaper,阿拉伯语系的shaper等。harfbuzz-ng如何选择一个shaper一文中有对字体shaper的选择做过一个简单的说明。那harfbuzz-ng又是在何处来决定要采用哪一个语言shaper的呢?作出决定的依据是什么?语言shaper的结构中又有些什么内容呢?接下来,我们就来尝试解答这些问题。
在harfbuzz-ng如何选择一个shaper一文中我们有提过,harfbuzz-ng在创建shape_plan时,会通过一个函数创建一个shape_plan data,对于ot shaper而言,这个函数就是_hb_ot_shaper_shape_plan_data_create(),实际上,选择语言shaper的动作也正是在这个部分完成的。我们来看这个函数的定义:
hb_ot_shaper_shape_plan_data_t * _hb_ot_shaper_shape_plan_data_create (hb_shape_plan_t *shape_plan, const hb_feature_t *user_features, unsigned int num_user_features) { hb_ot_shape_plan_t *plan = (hb_ot_shape_plan_t *) calloc (1, sizeof (hb_ot_shape_plan_t)); if (unlikely (!plan)) return NULL; hb_ot_shape_planner_t planner (shape_plan); planner.shaper = hb_ot_shape_complex_categorize (&planner); hb_ot_shape_collect_features (&planner, &shape_plan->props, user_features, num_user_features); planner.compile (*plan); if (plan->shaper->data_create) { plan->data = plan->shaper->data_create (plan); if (unlikely (!plan->data)) return NULL; } return plan; }是在这个函数中,通过调用 hb_ot_shape_complex_categorize()函数来选定一个语言shaper的,该函数的定义如下:
static inline const hb_ot_complex_shaper_t * hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner) { switch ((hb_tag_t) planner->props.script) { default: return &_hb_ot_complex_shaper_default; /* Unicode-1.1 additions */ case HB_SCRIPT_ARABIC: case HB_SCRIPT_MONGOLIAN: case HB_SCRIPT_SYRIAC: /* Unicode-5.0 additions */ case HB_SCRIPT_NKO: case HB_SCRIPT_PHAGS_PA: /* Unicode-6.0 additions */ case HB_SCRIPT_MANDAIC: /* For Arabic script, use the Arabic shaper even if no OT script tag was found. * This is because we do fallback shaping for Arabic script (and not others). */ if (planner->map.chosen_script[0] != HB_OT_TAG_DEFAULT_SCRIPT || planner->props.script == HB_SCRIPT_ARABIC) return &_hb_ot_complex_shaper_arabic; else return &_hb_ot_complex_shaper_default; /* Unicode-1.1 additions */ case HB_SCRIPT_THAI: case HB_SCRIPT_LAO: return &_hb_ot_complex_shaper_thai; /* ^--- Add new shapers here */ #if 0 /* Note: * * These disabled scripts are listed in ucd/IndicSyllabicCategory.txt, but according * to Martin Hosken and Jonathan Kew do not require complex shaping. * * TODO We should automate figuring out which scripts do not need complex shaping * * TODO We currently keep data for these scripts in our indic table. Need to fix the * generator to not do that. */ /* Simple? */ /* Unicode-3.2 additions */ case HB_SCRIPT_BUHID: case HB_SCRIPT_HANUNOO: /* Unicode-5.1 additions */ case HB_SCRIPT_SAURASHTRA: /* Unicode-6.0 additions */ case HB_SCRIPT_BATAK: case HB_SCRIPT_BRAHMI: /* Simple */ /* Unicode-1.1 additions */ /* These have their own shaper now. */ case HB_SCRIPT_LAO: case HB_SCRIPT_THAI: /* Unicode-2.0 additions */ case HB_SCRIPT_TIBETAN: /* Unicode-3.2 additions */ case HB_SCRIPT_TAGALOG: case HB_SCRIPT_TAGBANWA: /* Unicode-4.0 additions */ case HB_SCRIPT_LIMBU: case HB_SCRIPT_TAI_LE: /* Unicode-4.1 additions */ case HB_SCRIPT_KHAROSHTHI: case HB_SCRIPT_SYLOTI_NAGRI: /* Unicode-5.1 additions */ case HB_SCRIPT_KAYAH_LI: /* Unicode-5.2 additions */ case HB_SCRIPT_TAI_VIET: #endif /* Unicode-1.1 additions */ case HB_SCRIPT_BENGALI: case HB_SCRIPT_DEVANAGARI: case HB_SCRIPT_GUJARATI: case HB_SCRIPT_GURMUKHI: case HB_SCRIPT_KANNADA: case HB_SCRIPT_MALAYALAM: case HB_SCRIPT_ORIYA: case HB_SCRIPT_TAMIL: case HB_SCRIPT_TELUGU: /* Unicode-3.0 additions */ case HB_SCRIPT_SINHALA: /* Unicode-4.1 additions */ case HB_SCRIPT_BUGINESE: case HB_SCRIPT_NEW_TAI_LUE: /* Unicode-5.0 additions */ case HB_SCRIPT_BALINESE: /* Unicode-5.1 additions */ case HB_SCRIPT_CHAM: case HB_SCRIPT_LEPCHA: case HB_SCRIPT_REJANG: case HB_SCRIPT_SUNDANESE: /* Unicode-5.2 additions */ case HB_SCRIPT_JAVANESE: case HB_SCRIPT_KAITHI: case HB_SCRIPT_MEETEI_MAYEK: case HB_SCRIPT_TAI_THAM: /* Unicode-6.1 additions */ case HB_SCRIPT_CHAKMA: case HB_SCRIPT_SHARADA: case HB_SCRIPT_TAKRI: /* Only use Indic shaper if the font has Indic tables. */ if (planner->map.found_script[0]) return &_hb_ot_complex_shaper_indic; else return &_hb_ot_complex_shaper_default; case HB_SCRIPT_KHMER: /* A number of Khmer fonts in the wild don't have a 'pref' feature, * and as such won't shape properly via the Indic shaper; * however, they typically have 'liga' / 'clig' features that implement * the necessary "reordering" by means of ligature substitutions. * So we send such pref-less fonts through the generic shaper instead. */ if (planner->map.found_script[0] && hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB, planner->map.script_index[0], planner->map.language_index[0], HB_TAG ('p','r','e','f'), NULL)) return &_hb_ot_complex_shaper_indic; else return &_hb_ot_complex_shaper_default; case HB_SCRIPT_MYANMAR: /* For Myanmar, we only want to use the Indic shaper if the "new" script * tag is found. For "old" script tag we want to use the default shaper. */ if (planner->map.chosen_script[0] == HB_TAG ('m','y','m','2')) return &_hb_ot_complex_shaper_indic; else return &_hb_ot_complex_shaper_default; } }可以看到,这个函数选择语言shaper的依据就只有一个,那就是输入字串的script,这个函数用一个switch-case结构来选择一个语言shaper。在harfbuzz-0.9.12中,主要有如下的几种语言shaper:
在最新版本的harfbuzz中,有又添加一些新的语言shaper。
那语言shaper又是一个什么样的东西呢?我们来看hb_ot_complex_shaper_t结构的定义:
struct hb_ot_complex_shaper_t { char name[8]; /* collect_features() * Called during shape_plan(). * Shapers should use plan->map to add their features and callbacks. * May be NULL. */ void (*collect_features) (hb_ot_shape_planner_t *plan); /* override_features() * Called during shape_plan(). * Shapers should use plan->map to override features and add callbacks after * common features are added. * May be NULL. */ void (*override_features) (hb_ot_shape_planner_t *plan); /* data_create() * Called at the end of shape_plan(). * Whatever shapers return will be accessible through plan->data later. * If NULL is returned, means a plan failure. */ void *(*data_create) (const hb_ot_shape_plan_t *plan); /* data_destroy() * Called when the shape_plan is being destroyed. * plan->data is passed here for destruction. * If NULL is returned, means a plan failure. * May be NULL. */ void (*data_destroy) (void *data); /* preprocess_text() * Called during shape(). * Shapers can use to modify text before shaping starts. * May be NULL. */ void (*preprocess_text) (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, hb_font_t *font); /* normalization_preference() * Called during shape(). * May be NULL. */ hb_ot_shape_normalization_mode_t (*normalization_preference) (const hb_segment_properties_t *props); /* decompose() * Called during shape()'s normalization. * May be NULL. */ bool (*decompose) (const hb_ot_shape_normalize_context_t *c, hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b); /* compose() * Called during shape()'s normalization. * May be NULL. */ bool (*compose) (const hb_ot_shape_normalize_context_t *c, hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab); /* setup_masks() * Called during shape(). * Shapers should use map to get feature masks and set on buffer. * Shapers may NOT modify characters. * May be NULL. */ void (*setup_masks) (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, hb_font_t *font); bool zero_width_attached_marks; bool fallback_position; };
可以看到,这个结构里面主要就是提供了一些callback,以方便shape等过程在需要的时候来调用。这个结构的注释还是提供了比较多的信息,各个callback的作用都有说明,此处不再罗嗦。
Done.