Harfbuzz-ng中的语言shaper

在harfbuzz-ng中,实际上存在两种层面的shaper,一种是比较宏观的字库shaper,这类shaper一般都针对于特定的字库文件类型来写,比如graphite2 shaper,ot shaper等。还有一种就是语言的shaper,这种shaper是harfbuzz-ng所实现的ot shaper的一个子部分,这种shaper一般都针对于特定的语言或script来写,比如印度语系的shaper,泰语/老挝语的shaper,阿拉伯语系的shaper等。harfbuzz-ng如何选择一个shaper一文中有对字体shaper的选择做过一个简单的说明。那harfbuzz-ng又是在何处来决定要采用哪一个语言shaper的呢?作出决定的依据是什么?语言shaper的结构中又有些什么内容呢?接下来,我们就来尝试解答这些问题。

harfbuzz-ng如何选择一个shaper一文中我们有提过,harfbuzz-ng在创建shape_plan时,会通过一个函数创建一个shape_plan data,对于ot shaper而言,这个函数就是_hb_ot_shaper_shape_plan_data_create(),实际上,选择语言shaper的动作也正是在这个部分完成的。我们来看这个函数的定义:

hb_ot_shaper_shape_plan_data_t *
_hb_ot_shaper_shape_plan_data_create (hb_shape_plan_t    *shape_plan,
				      const hb_feature_t *user_features,
				      unsigned int        num_user_features)
{
  hb_ot_shape_plan_t *plan = (hb_ot_shape_plan_t *) calloc (1, sizeof (hb_ot_shape_plan_t));
  if (unlikely (!plan))
    return NULL;

  hb_ot_shape_planner_t planner (shape_plan);

  planner.shaper = hb_ot_shape_complex_categorize (&planner);

  hb_ot_shape_collect_features (&planner, &shape_plan->props, user_features, num_user_features);

  planner.compile (*plan);

  if (plan->shaper->data_create) {
    plan->data = plan->shaper->data_create (plan);
    if (unlikely (!plan->data))
      return NULL;
  }

  return plan;
}
是在这个函数中,通过调用 hb_ot_shape_complex_categorize()函数来选定一个语言shaper的,该函数的定义如下:

static inline const hb_ot_complex_shaper_t *
hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
{
  switch ((hb_tag_t) planner->props.script)
  {
    default:
      return &_hb_ot_complex_shaper_default;


    /* Unicode-1.1 additions */
    case HB_SCRIPT_ARABIC:
    case HB_SCRIPT_MONGOLIAN:
    case HB_SCRIPT_SYRIAC:

    /* Unicode-5.0 additions */
    case HB_SCRIPT_NKO:
    case HB_SCRIPT_PHAGS_PA:

    /* Unicode-6.0 additions */
    case HB_SCRIPT_MANDAIC:

      /* For Arabic script, use the Arabic shaper even if no OT script tag was found.
       * This is because we do fallback shaping for Arabic script (and not others). */
      if (planner->map.chosen_script[0] != HB_OT_TAG_DEFAULT_SCRIPT ||
	  planner->props.script == HB_SCRIPT_ARABIC)
	return &_hb_ot_complex_shaper_arabic;
      else
	return &_hb_ot_complex_shaper_default;


    /* Unicode-1.1 additions */
    case HB_SCRIPT_THAI:
    case HB_SCRIPT_LAO:

      return &_hb_ot_complex_shaper_thai;



    /* ^--- Add new shapers here */


#if 0
    /* Note:
     *
     * These disabled scripts are listed in ucd/IndicSyllabicCategory.txt, but according
     * to Martin Hosken and Jonathan Kew do not require complex shaping.
     *
     * TODO We should automate figuring out which scripts do not need complex shaping
     *
     * TODO We currently keep data for these scripts in our indic table.  Need to fix the
     * generator to not do that.
     */


    /* Simple? */

    /* Unicode-3.2 additions */
    case HB_SCRIPT_BUHID:
    case HB_SCRIPT_HANUNOO:

    /* Unicode-5.1 additions */
    case HB_SCRIPT_SAURASHTRA:

    /* Unicode-6.0 additions */
    case HB_SCRIPT_BATAK:
    case HB_SCRIPT_BRAHMI:


    /* Simple */

    /* Unicode-1.1 additions */
    /* These have their own shaper now. */
    case HB_SCRIPT_LAO:
    case HB_SCRIPT_THAI:

    /* Unicode-2.0 additions */
    case HB_SCRIPT_TIBETAN:

    /* Unicode-3.2 additions */
    case HB_SCRIPT_TAGALOG:
    case HB_SCRIPT_TAGBANWA:

    /* Unicode-4.0 additions */
    case HB_SCRIPT_LIMBU:
    case HB_SCRIPT_TAI_LE:

    /* Unicode-4.1 additions */
    case HB_SCRIPT_KHAROSHTHI:
    case HB_SCRIPT_SYLOTI_NAGRI:

    /* Unicode-5.1 additions */
    case HB_SCRIPT_KAYAH_LI:

    /* Unicode-5.2 additions */
    case HB_SCRIPT_TAI_VIET:


#endif

    /* Unicode-1.1 additions */
    case HB_SCRIPT_BENGALI:
    case HB_SCRIPT_DEVANAGARI:
    case HB_SCRIPT_GUJARATI:
    case HB_SCRIPT_GURMUKHI:
    case HB_SCRIPT_KANNADA:
    case HB_SCRIPT_MALAYALAM:
    case HB_SCRIPT_ORIYA:
    case HB_SCRIPT_TAMIL:
    case HB_SCRIPT_TELUGU:

    /* Unicode-3.0 additions */
    case HB_SCRIPT_SINHALA:

    /* Unicode-4.1 additions */
    case HB_SCRIPT_BUGINESE:
    case HB_SCRIPT_NEW_TAI_LUE:

    /* Unicode-5.0 additions */
    case HB_SCRIPT_BALINESE:

    /* Unicode-5.1 additions */
    case HB_SCRIPT_CHAM:
    case HB_SCRIPT_LEPCHA:
    case HB_SCRIPT_REJANG:
    case HB_SCRIPT_SUNDANESE:

    /* Unicode-5.2 additions */
    case HB_SCRIPT_JAVANESE:
    case HB_SCRIPT_KAITHI:
    case HB_SCRIPT_MEETEI_MAYEK:
    case HB_SCRIPT_TAI_THAM:


    /* Unicode-6.1 additions */
    case HB_SCRIPT_CHAKMA:
    case HB_SCRIPT_SHARADA:
    case HB_SCRIPT_TAKRI:

      /* Only use Indic shaper if the font has Indic tables. */
      if (planner->map.found_script[0])
	return &_hb_ot_complex_shaper_indic;
      else
	return &_hb_ot_complex_shaper_default;

    case HB_SCRIPT_KHMER:
      /* A number of Khmer fonts in the wild don't have a 'pref' feature,
       * and as such won't shape properly via the Indic shaper;
       * however, they typically have 'liga' / 'clig' features that implement
       * the necessary "reordering" by means of ligature substitutions.
       * So we send such pref-less fonts through the generic shaper instead. */
      if (planner->map.found_script[0] &&
	  hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB,
					      planner->map.script_index[0],
					      planner->map.language_index[0],
					      HB_TAG ('p','r','e','f'),
					      NULL))
	return &_hb_ot_complex_shaper_indic;
      else
	return &_hb_ot_complex_shaper_default;

    case HB_SCRIPT_MYANMAR:
      /* For Myanmar, we only want to use the Indic shaper if the "new" script
       * tag is found.  For "old" script tag we want to use the default shaper. */
      if (planner->map.chosen_script[0] == HB_TAG ('m','y','m','2'))
	return &_hb_ot_complex_shaper_indic;
      else
	return &_hb_ot_complex_shaper_default;
  }
}
可以看到,这个函数选择语言shaper的依据就只有一个,那就是输入字串的script,这个函数用一个switch-case结构来选择一个语言shaper。在harfbuzz-0.9.12中,主要有如下的几种语言shaper:
  • _hb_ot_complex_shaper_default
  • _hb_ot_complex_shaper_arabic
  • _hb_ot_complex_shaper_thai
  • _hb_ot_complex_shaper_indic

在最新版本的harfbuzz中,有又添加一些新的语言shaper。

那语言shaper又是一个什么样的东西呢?我们来看hb_ot_complex_shaper_t结构的定义:

struct hb_ot_complex_shaper_t
{
  char name[8];

  /* collect_features()
   * Called during shape_plan().
   * Shapers should use plan->map to add their features and callbacks.
   * May be NULL.
   */
  void (*collect_features) (hb_ot_shape_planner_t *plan);

  /* override_features()
   * Called during shape_plan().
   * Shapers should use plan->map to override features and add callbacks after
   * common features are added.
   * May be NULL.
   */
  void (*override_features) (hb_ot_shape_planner_t *plan);


  /* data_create()
   * Called at the end of shape_plan().
   * Whatever shapers return will be accessible through plan->data later.
   * If NULL is returned, means a plan failure.
   */
  void *(*data_create) (const hb_ot_shape_plan_t *plan);

  /* data_destroy()
   * Called when the shape_plan is being destroyed.
   * plan->data is passed here for destruction.
   * If NULL is returned, means a plan failure.
   * May be NULL.
   */
  void (*data_destroy) (void *data);


  /* preprocess_text()
   * Called during shape().
   * Shapers can use to modify text before shaping starts.
   * May be NULL.
   */
  void (*preprocess_text) (const hb_ot_shape_plan_t *plan,
			   hb_buffer_t              *buffer,
			   hb_font_t                *font);


  /* normalization_preference()
   * Called during shape().
   * May be NULL.
   */
  hb_ot_shape_normalization_mode_t
  (*normalization_preference) (const hb_segment_properties_t *props);

  /* decompose()
   * Called during shape()'s normalization.
   * May be NULL.
   */
  bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
		     hb_codepoint_t  ab,
		     hb_codepoint_t *a,
		     hb_codepoint_t *b);

  /* compose()
   * Called during shape()'s normalization.
   * May be NULL.
   */
  bool (*compose) (const hb_ot_shape_normalize_context_t *c,
		   hb_codepoint_t  a,
		   hb_codepoint_t  b,
		   hb_codepoint_t *ab);

  /* setup_masks()
   * Called during shape().
   * Shapers should use map to get feature masks and set on buffer.
   * Shapers may NOT modify characters.
   * May be NULL.
   */
  void (*setup_masks) (const hb_ot_shape_plan_t *plan,
		       hb_buffer_t              *buffer,
		       hb_font_t                *font);

  bool zero_width_attached_marks;
  bool fallback_position;
};

可以看到,这个结构里面主要就是提供了一些callback,以方便shape等过程在需要的时候来调用。这个结构的注释还是提供了比较多的信息,各个callback的作用都有说明,此处不再罗嗦。

Done.

你可能感兴趣的:(harfbuzz,harfbuzz-ng,OpenType,shaping)