GCC-3.4.6源代码学习笔记(78)

5.6.1.1.2.            数字

预处理器除了知道什么是数字外,并不尝试去解读它。对于预处理器来说,这样并无不可,而且这样可以使得预处理器更灵活。但是当从预处理器处得到数字序列时,词法分析器需要知道如何解读它。函数cpp_classify_number尝试根据数字字符串设置flags

 

143  unsigned int

144  cpp_classify_number (cpp_reader *pfile, const cpp_token *token)                  in cppexp.c

145  {

146    const uchar *str = token->val.str.text;

147    const uchar *limit;

148    unsigned int max_digit, result, radix;

149    enum {NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON} float_flag;

150 

151    /* If the lexer has done its job, length one can only be a single

152      digit. Fast-path this very common case.  */

153    if (token->val.str.len == 1)

154      return CPP_N_INTEGER | CPP_N_SMALL | CPP_N_DECIMAL;

155 

156    limit = str + token->val.str.len;

157    float_flag = NOT_FLOAT;

158    max_digit = 0;

159    radix = 10;

160 

161    /* First, interpret the radix.  */

162    if (*str == '0')

163    {

164      radix = 8;

165      str++;

166 

167      /* Require at least one hex digit to classify it as hex.  */

168      if ((*str == 'x' || *str == 'X')

169        && (str[1] == '.' || ISXDIGIT (str[1])))

170      {

171        radix = 16;

172        str++;

173      }

174    }

175 

176    /* Now scan for a well-formed integer or float.  */

177    for (;;)

178    {

179      unsigned int c = *str++;

180 

181      if (ISDIGIT (c) || (ISXDIGIT (c) && radix == 16))

182      {

183        c = hex_value (c);

184        if (c > max_digit)

185          max_digit = c;

186      }

187      else if (c == '.')

188      {

189        if (float_flag == NOT_FLOAT)

190          float_flag = AFTER_POINT;

191        else

192          SYNTAX_ERROR ("too many decimal points in number");

193      }

194      else if ((radix <= 10 && (c == 'e' || c == 'E'))

195            || (radix == 16 && (c == 'p' || c == 'P')))

196      {

197        float_flag = AFTER_EXPON;

198        break;

199      }

200      else

201      {

202        /* Start of suffix.  */

203        str--;

204        break;

205      }

206    }

207 

208    if (float_flag != NOT_FLOAT && radix == 8)

209      radix = 10;

210 

211    if (max_digit >= radix)

212      SYNTAX_ERROR2 ("invalid digit /"%c/" in octal constant", '0' + max_digit);

213 

214    if (float_flag != NOT_FLOAT)

215    {

216      if (radix == 16 && CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, c99))

217        cpp_error (pfile, CPP_DL_PEDWARN,

218                "use of C99 hexadecimal floating constant");

219 

220      if (float_flag == AFTER_EXPON)

221      {

222        if (*str == '+' || *str == '-')

223          str++;

224 

225        /* Exponent is decimal, even if string is a hex float.  */

226        if (!ISDIGIT (*str))

227          SYNTAX_ERROR ("exponent has no digits");

228 

229        do

230          str++;

231        while (ISDIGIT (*str));

232      }

233      else if (radix == 16)

234        SYNTAX_ERROR ("hexadecimal floating constants require an exponent");

235 

236      result = interpret_float_suffix (str, limit - str);

237      if (result == 0)

238      {

239        cpp_error (pfile, CPP_DL_ERROR,

240                "invalid suffix /"%.*s/" on floating constant",

241                 (int) (limit - str), str);

242        return CPP_N_INVALID;

243      }

244 

245      /* Traditional C didn't accept any floating suffixes.  */

246      if (limit != str

247         && CPP_WTRADITIONAL (pfile)

248         && ! cpp_sys_macro_p (pfile))

249        cpp_error (pfile, CPP_DL_WARNING,

250                 "traditional C rejects the /"%.*s/" suffix",

251                 (int) (limit - str), str);

252 

253      result |= CPP_N_FLOATING;

254    }

255    else

256    {

257      result = interpret_int_suffix (str, limit - str);

258      if (result == 0)

259      {

260        cpp_error (pfile, CPP_DL_ERROR,

261                 "invalid suffix /"%.*s/" on integer constant",

262                  (int) (limit - str), str);

263        return CPP_N_INVALID;

264      }

265 

266      /* Traditional C only accepted the 'L' suffix.

267        Suppress warning about 'LL' with -Wno-long-long.  */

268      if (CPP_WTRADITIONAL (pfile) && ! cpp_sys_macro_p (pfile))

269      {

270        int u_or_i = (result & (CPP_N_UNSIGNED|CPP_N_IMAGINARY));

271        int large = (result & CPP_N_WIDTH) == CPP_N_LARGE;

272 

273        if (u_or_i || (large && CPP_OPTION (pfile, warn_long_long)))

274          cpp_error (pfile, CPP_DL_WARNING,

275                   "traditional C rejects the /"%.*s/" suffix",

276                   (int) (limit - str), str);

277      }

278 

279      if ((result & CPP_N_WIDTH) == CPP_N_LARGE

280          && ! CPP_OPTION (pfile, c99)

281          && CPP_OPTION (pfile, warn_long_long))

282        cpp_error (pfile, CPP_DL_PEDWARN,

283                 "use of C99 long long integer constant");

284 

285      result |= CPP_N_INTEGER;

286    }

287 

288    if ((result & CPP_N_IMAGINARY) && CPP_PEDANTIC (pfile))

289      cpp_error (pfile, CPP_DL_PEDWARN,

290               "imaginary constants are a GCC extension");

291 

292    if (radix == 10)

293      result |= CPP_N_DECIMAL;

294    else if (radix == 16)

295      result |= CPP_N_HEX;

296    else

297      result |= CPP_N_OCTAL;

298 

299    return result;

300 

301  syntax_error:

302    return CPP_N_INVALID;

303  }

 

同样一个数值常量,在C++下可以有多个写法,比如:12340x4D2023221.234e30x4.d2p8都是代表十进制值1234,当然后2个写法算作浮点数,一个以10为底(e),另一个以2为底(p)。另外,这些常量后还可跟有后缀,以限定其属性。浮点常量可用的后缀有f/F(单精度),l/L(双精度),i/I(虚数实部),j/J(虚数虚部)。整数常量可用后缀有l/Llong),ll/LLlong long),u/U(无符号),i/I(虚数实部),j/J(虚数虚部)。在这里,cpp_classify_number会检查这些后缀是否有效,并根据后缀设置CPP_N_SMALLCPP_N_MEDIUMCPP_N_LARGE

5.6.1.1.2.1.  整数

整数常量由下面的函数来解析,并构建一个INTERGER_CST节点。

 

490  static tree

491  interpret_integer (const cpp_token *token, unsigned int flags)                        in c-lex.c

492  {

493    tree value, type;

494    enum integer_type_kind itk;

495    cpp_num integer;

496    cpp_options *options = cpp_get_options (parse_in);

497 

498    integer = cpp_interpret_integer (parse_in, token, flags);

499    integer = cpp_num_sign_extend (integer, options->precision);

500    value = build_int_2_wide (integer.low, integer.high);

 

如果是整数,首先要验证对于目标机器来说,该数字是有效的。那么结构体cpp_num被用来从cpp_interpret_integer收集这个结果。

 

604  struct cpp_num                                                                                          in cpplib.h

605  {

606    cpp_num_part  high;

607    cpp_num_part  low;

608    bool unsignedp;  /* True if value should be treated as unsigned.  */

609    bool overflow;   /* True if the most recent calculation overflowed.  */

610  };

 

在其定义中,域highlow被设计成宿主机器(host machine)上最宽的整数。对于Linux/x86,这个类型是long

 

602  typedef unsigned HOST_WIDE_INT cpp_num_part;                                     in cpplib.h

 

函数cpp_interpret_integer的参数type就是cpp_classify_number辛苦分析出来的结果。

 

311   cpp_num

312  cpp_interpret_integer (cpp_reader *pfile, const cpp_token *token,                 in cppexp.c

313                     unsigned int type)

314  {

315    const uchar *p, *end;

316    cpp_num result;

317 

318    result.low = 0;

319    result.high = 0;

320    result.unsignedp = !!(type & CPP_N_UNSIGNED);

321    result.overflow = false;

322 

323    p = token->val.str.text;

324    end = p + token->val.str.len;

325 

326    /* Common case of a single digit.  */

327    if (token->val.str.len == 1)

328      result.low = p[0] - '0';

329    else

330    {

331      cpp_num_part max;

332      size_t precision = CPP_OPTION (pfile, precision);

333      unsigned int base = 10, c = 0;

334      bool overflow = false;

335 

336      if ((type & CPP_N_RADIX) == CPP_N_OCTAL)

337      {

338        base = 8;

339        p++;

340      }

341      else if ((type & CPP_N_RADIX) == CPP_N_HEX)

342      {

343        base = 16;

344        p += 2;

345      }

346 

347      /* We can add a digit to numbers strictly less than this without

348        needing the precision and slowness of double integers.  */

349      max = ~(cpp_num_part) 0;

350      if (precision < PART_PRECISION)

351        max >>= PART_PRECISION - precision;

352      max = (max - base + 1) / base + 1;

353 

354      for (; p < end; p++)

355      {

356        c = *p;

357 

358        if (ISDIGIT (c) || (base == 16 && ISXDIGIT (c)))

359          c = hex_value (c);

360        else

361          break;

362 

363        /* Strict inequality for when max is set to zero.  */

364        if (result.low < max)

365          result.low = result.low * base + c;

366        else

367        {

368          result = append_digit (result, c, base, precision);

369          overflow |= result.overflow;

370          max = 0;

371        }

372      }

 

上面的PART_PRECISIONhighlow类型的比特数(为sizeof(long))。而pfile所保存的precisioncpp_create_reader中初始化且是long类型的比特大小。从349352行的代码计算精度所允许的base的最大倍数。

那么当结果仍旧在max以下时,移入当前的数字还是安全的。否则。它需要由append_digit小心处理。

 

397  static cpp_num

398  append_digit (cpp_num num, int digit, int base, size_t precision)                   in cppexp.c

399  {

400    cpp_num result;

401    unsigned int shift = 3 + (base == 16);

402    bool overflow;

403    cpp_num_part add_high, add_low;

404 

405    /* Multiply by 8 or 16. Catching this overflow here means we don't

406      need to worry about add_high overflowing.  */

407    overflow = !!(num.high >> (PART_PRECISION - shift));

408    result.high = num.high << shift;

409    result.low = num.low << shift;

410    result.high |= num.low >> (PART_PRECISION - shift);

411  

412    if (base == 10)

413    {

414      add_low = num.low << 1;

415      add_high = (num.high << 1) + (num.low >> (PART_PRECISION - 1));

416    }

417    else

418      add_high = add_low = 0;

419 

420    if (add_low + digit < add_low)

421      add_high++;

422    add_low += digit;

423     

424    if (result.low + add_low < result.low)

425      add_high++;

426    if (result.high + add_high < result.high)

427      overflow = true;

428 

429    result.low += add_low;

430   result.high += add_high;

431 

432   /* The above code catches overflow of a cpp_num type. This catches

433      overflow of the (possibly shorter) target precision.  */

434    num.low = result.low;

435    num.high = result.high;

436    result = num_trim (result, precision);

437    if (!num_eq (result, num))

438      overflow = true;

439 

440    result.unsignedp = num.unsignedp;

441    result.overflow = overflow;

442    return result;

443  }

 

因为result使用域highlow来保存被解释的数值,首先检查这个额外的数字是否将使result溢出。注意仅对于base16shift将是4,否则则是3。然而对于base10,左移这个数值3个比特仅把它乘以8而不是10,因此414415行加上该数值的2倍数。这个加法可能导致溢出,424426行为之进行一个检查。

显然,其结果的数值很可能超出了pfile所指定的精度(参数precision)。函数num_trim根据精度裁剪这个结果。

 

1004 static cpp_num

1005 num_trim (cpp_num num, size_t precision)                                                 in cppexp.c

1006 {

1007   if (precision > PART_PRECISION)

1008   {

1009     precision -= PART_PRECISION;

1010     if (precision < PART_PRECISION)

1011       num.high &= ((cpp_num_part) 1 << precision) - 1;

1012   }

1013   else

1014   {

1015     if (precision < PART_PRECISION)

1016       num.low &= ((cpp_num_part) 1 << precision) - 1;

1017     num.high = 0;

1018   }

1019

1020   return num;

1021 }

 

无可如何,如果裁剪的结果与裁剪前不一样,毫无疑问,发生了溢出。从append_digit返回,因为它是给定精度下最后一个可以被加入的数字,在cpp_interpret_integer670行, max被设为0,使得对余下的数字364行的条件永远为false

 

cpp_interpret_integer (continue)

 

374      if (overflow)

375        cpp_error (pfile, CPP_DL_PEDWARN,

376                 "integer constant is too large for its type");

377      /* If too big to be signed, consider it unsigned. Only warn for

378        decimal numbers. Traditional numbers were always signed (but

379        we still honor an explicit U suffix); but we only have

380        traditional semantics in directives.  */

381      else if (!result.unsignedp

382            && !(CPP_OPTION (pfile, traditional)

383            && pfile->state.in_directive)

384            && !num_positive (result, precision))

385      {

386        if (base == 10)

387          cpp_error (pfile, CPP_DL_WARNING,

388                   "integer constant is so large that it is unsigned");

389        result.unsignedp = true;

390      }

391    }

392 

393    return result;

394  }

 

因为树节点INTEGER_CST也是使用HOST_WIDE_INT类型的highlow域,以2进制补码的形式保存数值。对于有符号的数值,这是要求符号扩展的。这项工作由cpp_num_sign_extend来完成。

 

1038 cpp_num

1039 cpp_num_sign_extend (cpp_num num, size_t precision)                                in cppexp.c

1040 {

1041   if (!num.unsignedp)

1042   {

1043     if (precision > PART_PRECISION)

1044     {

1045       precision -= PART_PRECISION;

1046       if (precision < PART_PRECISION

1047           && (num.high & (cpp_num_part) 1 << (precision - 1)))

1048         num.high |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision));

1049     }

1050     else if (num.low & (cpp_num_part) 1 << (precision - 1))

1051     {

1052       if (precision < PART_PRECISION)

1053         num.low |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision));

1054       num.high = ~(cpp_num_part) 0;

1055     }

1056   }

1057

1058   return num;

1059 }

 

然后经过验证的数值被填入由在interpret_integer 500行的built_int_2_wide所创建的value节点中。

接下来,要为结果关联上类型。我们已经看到,在编译器的初始化阶段,整数的类型节点都已经被创建。将要选出最适合的类型(包含尽可能少的比特位)。

 

interpret_integer (continue)

 

502    /* The type of a constant with a U suffix is straightforward.  */

503    if (flags & CPP_N_UNSIGNED)

504      itk = narrowest_unsigned_type (value, flags);

505    else

506    {

507      /* The type of a potentially-signed integer constant varies

508        depending on the base it's in, the standard in use, and the

509        length suffixes.  */

510      enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);

511       enum integer_type_kind itk_s = narrowest_signed_type (value, flags);

512 

513      /* In both C89 and C99, octal and hex constants may be signed or

514        unsigned, whichever fits tighter. We do not warn about this

515        choice differing from the traditional choice, as the constant

516        is probably a bit pattern and either way will work.  */

517      if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)

518        itk = MIN (itk_u, itk_s);

519      else

520      {

521        /* In C99, decimal constants are always signed.

522          In C89, decimal constants that don't fit in long have

523          undefined behavior; we try to make them unsigned long.

524          In GCC's extended C89, that last is true of decimal

525          constants that don't fit in long long, too.  */

526 

527        itk = itk_s;

528        if (itk_s > itk_u && itk_s > itk_long)

529        {

530          if (!flag_isoc99)

531          {

532            if (itk_u < itk_unsigned_long)

533              itk_u = itk_unsigned_long;

534            itk = itk_u;

535            warning ("this decimal constant is unsigned only in ISO C90");

536          }

537          else if (warn_traditional)

538            warning ("this decimal constant would be unsigned in ISO C90");

539        }

540      }

541    }

542 

543    if (itk == itk_none)

544      /* cpplib has already issued a warning for overflow.  */

545      type = ((flags & CPP_N_UNSIGNED)

546         ? widest_unsigned_literal_type_node

547         : widest_integer_literal_type_node);

548    else

549      type = integer_types[itk];

550 

551    if (itk > itk_unsigned_long

552        && (flags & CPP_N_WIDTH) != CPP_N_LARGE

553        && ! in_system_header && ! flag_isoc99)

554      pedwarn ("integer constant is too large for /"%s/" type",

555              (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");

556 

557    TREE_TYPE (value) = type;

558 

559    /* Convert imaginary to a complex type.  */

560    if (flags & CPP_N_IMAGINARY)

561      value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);

562 

563    return value;

564  }

 

函数narrowest_unsigned_typenarrowest_signed_type非常相似。对于整型常量,如果不带后缀‘l/L’,该值被认为是CPP_N_SMALL,如果带后缀‘l/L’,则被视为CPP_N_MEDIUM,否则就是CPP_N_LARGE(参考cpp_classify_number)。

 

442  static enum integer_type_kind

443  narrowest_unsigned_type (tree value, unsigned int flags)                               in c-lex.c

444  {

445    enum integer_type_kind itk;

446 

447    if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)

448      itk = itk_unsigned_int;

449    else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)

450      itk = itk_unsigned_long;

451    else

452      itk = itk_unsigned_long_long;

453 

454    /* int_fits_type_p must think the type of its first argument is

455      wider than its second argument, or it won't do the proper check.  */

456    TREE_TYPE (value) = widest_unsigned_literal_type_node;

457 

458    for (; itk < itk_none; itk += 2 /* skip unsigned types */)

459      if (int_fits_type_p (value, integer_types[itk]))

460        return itk;

461 

462    return itk_none;

463  }

 

459行,integer_types设计成有符号、无符号类型间隔出现。而在456行,注意value的类型被故意设置成widest_unsigned_literal_type_node(在32x86系统上为long long),这是该系统所能支持的最大整型。这将影响int_fits_type_p的结果。

 

4243 int

4244 int_fits_type_p (tree c, tree type)                                                                       in tree.c

4245 {

4246   tree type_low_bound = TYPE_MIN_VALUE (type);

4247   tree type_high_bound = TYPE_MAX_VALUE (type);

4248   int ok_for_low_bound, ok_for_high_bound;

4249

4250   /* Perform some generic filtering first, which may allow making a decision

4251     even if the bounds are not constant. First, negative integers never fit

4252     in unsigned types, */

4253   if ((TREE_UNSIGNED (type) && tree_int_cst_sgn (c) < 0)

4254       /* Also, unsigned integers with top bit set never fit signed types.  */

4255       || (! TREE_UNSIGNED (type)

4256           && TREE_UNSIGNED (TREE_TYPE (c)) && tree_int_cst_msb (c)))

4257     return 0;

4258

4259   /* If at least one bound of the type is a constant integer, we can check

4260     ourselves and maybe make a decision. If no such decision is possible, but

4261     this type is a subtype, try checking against that. Otherwise, use

4262     force_fit_type, which checks against the precision.

4263

4264     Compute the status for each possibly constant bound, and return if we see

4265     one does not match. Use ok_for_xxx_bound for this purpose, assigning -1

4266     for "unknown if constant fits", 0 for "constant known *not* to fit" and 1

4267     for "constant known to fit".  */

4268

4269   ok_for_low_bound = -1;

4270   ok_for_high_bound = -1;

4271

4272   /* Check if C >= type_low_bound.  */

4273   if (type_low_bound && TREE_CODE (type_low_bound) == INTEGER_CST)

4274   {

4275     ok_for_low_bound = ! tree_int_cst_lt (c, type_low_bound);

4276     if (! ok_for_low_bound)

4277        return 0;

4278   }

4279

4280   /* Check if c <= type_high_bound.  */

4281   if (type_high_bound && TREE_CODE (type_high_bound) == INTEGER_CST)

4282   {

4283     ok_for_high_bound = ! tree_int_cst_lt (type_high_bound, c);

4284     if (! ok_for_high_bound)

4285        return 0;

4286   }

4287

4288  /* If the constant fits both bounds, the result is known.  */

4289   if (ok_for_low_bound == 1 && ok_for_high_bound == 1)

4290     return 1;

4291

4292   /* If we haven't been able to decide at this point, there nothing more we

4293     can check ourselves here. Look at the base type if we have one.  */

4294   else if (TREE_CODE (type) == INTEGER_TYPE && TREE_TYPE (type) != 0)

4295     return int_fits_type_p (c, TREE_TYPE (type));

4296

4297   /* Or to force_fit_type, if nothing else.  */

4298   else

4299   {

4300     c = copy_node (c);

4301     TREE_TYPE (c) = type;

4302     return !force_fit_type (c, 0);

4303   }

4304 }

 

我们已经在前面看到,类型节点的TYPE_MIN_VALUETYPE_MAX_VALUE分别表示该类型所表示的最小、大值。如果超出此范围,该类型是不合适的。另外,对于负数,无符号类型不是合适类型;对于使用了最高位的无符号数,有符号类型也是不合适的。

注意只有类型的TYPE_MIN_VALUETYPE_MAX_VALUE没有被设定的情况下比如typedef声明的int类型),我们才会进入4292行以下的代码。

如果narrowest_unsigned_type找不出合适的类型那么只好使用系统所能支持的最大类型544行的注释指出这时编译器应该已经给出溢出警告。而这个被找出的type,在interpret_integer557最终被设置为所解析数值的类型。而代表该整数的节点将返回给cp_tokenvalue域。

 

你可能感兴趣的:(tree,Integer,token,float,hex,Constants)