Studying note of GCC-3.4.6 source (78)

5.6.1.1.2.            Number

Preprocessor knows little about the number except it knows what is digit. It is OK for preprocessor does so and it makes the preprocessor more flexible. But when receiving the digital sequence returned by preprocessor, the lexer now needs know how to interpret it. Routine cpp_classify_number tries to set flags according to the literal string of number.

 

143  unsigned int

144  cpp_classify_number (cpp_reader *pfile, const cpp_token *token)                  in cppexp.c

145  {

146    const uchar *str = token->val.str.text;

147    const uchar *limit;

148    unsigned int max_digit, result, radix;

149    enum {NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON} float_flag;

150 

151    /* If the lexer has done its job, length one can only be a single

152      digit. Fast-path this very common case.  */

153    if (token->val.str.len == 1)

154      return CPP_N_INTEGER | CPP_N_SMALL | CPP_N_DECIMAL;

155 

156    limit = str + token->val.str.len;

157    float_flag = NOT_FLOAT;

158    max_digit = 0;

159    radix = 10;

160 

161    /* First, interpret the radix.  */

162    if (*str == '0')

163    {

164      radix = 8;

165      str++;

166 

167      /* Require at least one hex digit to classify it as hex.  */

168      if ((*str == 'x' || *str == 'X')

169         && (str[1] == '.' || ISXDIGIT (str[1])))

170      {

171        radix = 16;

172        str++;

173      }

174    }

175 

176    /* Now scan for a well-formed integer or float.  */

177    for (;;)

178    {

179      unsigned int c = *str++;

180 

181      if (ISDIGIT (c) || (ISXDIGIT (c) && radix == 16))

182      {

183        c = hex_value (c);

184        if (c > max_digit)

185          max_digit = c;

186      }

187      else if (c == '.')

188      {

189        if (float_flag == NOT_FLOAT)

190          float_flag = AFTER_POINT;

191        else

192          SYNTAX_ERROR ("too many decimal points in number");

193      }

194      else if ((radix <= 10 && (c == 'e' || c == 'E'))

195            || (radix == 16 && (c == 'p' || c == 'P')))

196      {

197        float_flag = AFTER_EXPON;

198        break;

199      }

200      else

201      {

202        /* Start of suffix.  */

203        str--;

204        break;

205      }

206    }

207 

208    if (float_flag != NOT_FLOAT && radix == 8)

209      radix = 10;

210 

211    if (max_digit >= radix)

212      SYNTAX_ERROR2 ("invalid digit /"%c/" in octal constant", '0' + max_digit);

213 

214    if (float_flag != NOT_FLOAT)

215    {

216      if (radix == 16 && CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, c99))

217        cpp_error (pfile, CPP_DL_PEDWARN,

218                 "use of C99 hexadecimal floating constant");

219 

220      if (float_flag == AFTER_EXPON)

221      {

222        if (*str == '+' || *str == '-')

223          str++;

224 

225        /* Exponent is decimal, even if string is a hex float.  */

226        if (!ISDIGIT (*str))

227          SYNTAX_ERROR ("exponent has no digits");

228 

229        do

230          str++;

231        while (ISDIGIT (*str));

232      }

233      else if (radix == 16)

234        SYNTAX_ERROR ("hexadecimal floating constants require an exponent");

235 

236      result = interpret_float_suffix (str, limit - str);

237      if (result == 0)

238      {

239        cpp_error (pfile, CPP_DL_ERROR,

240                 "invalid suffix /"%.*s/" on floating constant",

241                 (int) (limit - str), str);

242        return CPP_N_INVALID;

243      }

244 

245      /* Traditional C didn't accept any floating suffixes.  */

246      if (limit != str

247         && CPP_WTRADITIONAL (pfile)

248         && ! cpp_sys_macro_p (pfile))

249        cpp_error (pfile, CPP_DL_WARNING,

250                 "traditional C rejects the /"%.*s/" suffix",

251                 (int) (limit - str), str);

252 

253      result |= CPP_N_FLOATING;

254    }

255    else

256    {

257     result = interpret_int_suffix (str, limit - str);

258      if (result == 0)

259      {

260        cpp_error (pfile, CPP_DL_ERROR,

261                 "invalid suffix /"%.*s/" on integer constant",

262                  (int) (limit - str), str);

263        return CPP_N_INVALID;

264      }

265 

266      /* Traditional C only accepted the 'L' suffix.

267        Suppress warning about 'LL' with -Wno-long-long.  */

268      if (CPP_WTRADITIONAL (pfile) && ! cpp_sys_macro_p (pfile))

269      {

270        int u_or_i = (result & (CPP_N_UNSIGNED|CPP_N_IMAGINARY));

271        int large = (result & CPP_N_WIDTH) == CPP_N_LARGE;

272 

273        if (u_or_i || (large && CPP_OPTION (pfile, warn_long_long)))

274          cpp_error (pfile, CPP_DL_WARNING,

275                   "traditional C rejects the /"%.*s/" suffix",

276                   (int) (limit - str), str);

277      }

278 

279      if ((result & CPP_N_WIDTH) == CPP_N_LARGE

280          && ! CPP_OPTION (pfile, c99)

281          && CPP_OPTION (pfile, warn_long_long))

282        cpp_error (pfile, CPP_DL_PEDWARN,

283                 "use of C99 long long integer constant");

284 

285      result |= CPP_N_INTEGER;

286    }

287 

288    if ((result & CPP_N_IMAGINARY) && CPP_PEDANTIC (pfile))

289      cpp_error (pfile, CPP_DL_PEDWARN,

290               "imaginary constants are a GCC extension");

291 

292    if (radix == 10)

293      result |= CPP_N_DECIMAL;

294    else if (radix == 16)

295      result |= CPP_N_HEX;

296    else

297      result |= CPP_N_OCTAL;

298 

299    return result;

300 

301  syntax_error:

302    return CPP_N_INVALID;

303  }

 

In C++, there several representations for a numeric constant, for instance: 1234, 0x4D2, 02322, 1.234e3, 0x4.d2p8 all stand for decimal value of 1234. Of course, the rear two are regarded as floating point number, one uses 10 as log base (e), the other uses 2 (p). Further, these constants can have suffix to restrict its property. Constant of floating point can use f/F (single precision), l/L (double precision), i/I (real part of complex), j/J (imagery part). Constant of integer can use l/L (long), ll/LL (long long), u/U (unsigned), i/I (real part of complex), j/J (imagery part). Here, cpp_classify_number will validate the suffix, and set CPP_N_SMALL, CPP_N_MEDIUM, or CPP_N_LARGE accordingly.

5.6.1.1.2.1.      Case of integer

Integer constant is handled by followin function, and an INTERGER_CST node will be built.

 

490  static tree

491  interpret_integer (const cpp_token *token, unsigned int flags)                        in c-lex.c

492  {

493    tree value, type;

494    enum integer_type_kind itk;

495    cpp_num integer;

496    cpp_options *options = cpp_get_options (parse_in);

497 

498    integer = cpp_interpret_integer (parse_in, token, flags);

499    integer = cpp_num_sign_extend (integer, options->precision);

500    value = build_int_2_wide (integer.low, integer.high);

 

If it’s an integer number, the number must be first validated for the target machine. Then structure cpp_num is used to collect result from cpp_interpret_integer.

 

604  struct cpp_num                                                                                          in cpplib.h

605  {

606    cpp_num_part  high;

607    cpp_num_part  low;

608    bool unsignedp;  /* True if value should be treated as unsigned.  */

609    bool overflow;   /* True if the most recent calculation overflowed.  */

610  };

 

In the definition, slot high, low are defined as the widest integer on the host machine. For Linux/x86, this type is long.

 

602  typedef unsigned HOST_WIDE_INT cpp_num_part;                                     in cpplib.h

 

Argument type of cpp_interpret_integer is the result gotten hardly by cpp_classify_number.

 

311   cpp_num

312  cpp_interpret_integer (cpp_reader *pfile, const cpp_token *token,                 in cppexp.c

313                     unsigned int type)

314  {

315    const uchar *p, *end;

316    cpp_num result;

317 

318    result.low = 0;

319    result.high = 0;

320    result.unsignedp = !!(type & CPP_N_UNSIGNED);

321    result.overflow = false;

322 

323    p = token->val.str.text;

324    end = p + token->val.str.len;

325 

326    /* Common case of a single digit.  */

327    if (token->val.str.len == 1)

328      result.low = p[0] - '0';

329    else

330    {

331      cpp_num_part max;

332      size_t precision = CPP_OPTION (pfile, precision);

333      unsigned int base = 10, c = 0;

334      bool overflow = false;

335 

336      if ((type & CPP_N_RADIX) == CPP_N_OCTAL)

337      {

338        base = 8;

339        p++;

340      }

341      else if ((type & CPP_N_RADIX) == CPP_N_HEX)

342      {

343        base = 16;

344        p += 2;

345      }

346 

347      /* We can add a digit to numbers strictly less than this without

348        needing the precision and slowness of double integers.  */

349      max = ~(cpp_num_part) 0;

350      if (precision < PART_PRECISION)

351        max >>= PART_PRECISION - precision;

352      max = (max - base + 1) / base + 1;

353 

354      for (; p < end; p++)

355      {

356        c = *p;

357 

358        if (ISDIGIT (c) || (base == 16 && ISXDIGIT (c)))

359          c = hex_value (c);

360        else

361          break;

362 

363        /* Strict inequality for when max is set to zero.  */

364        if (result.low < max)

365          result.low = result.low * base + c;

366        else

367        {

368          result = append_digit (result, c, base, precision);

369          overflow |= result.overflow;

370          max = 0;

371        }

372      }

 

PART_PRECISION above is the bit size of the type of high and low (it is sizeof(long)). And the precision held in pfile is initialized in cpp_create_reader which is the bit size of long. Code at lines from 349 to 352 calculates the max multiple of base allowed by precision.

Then when the result is still below max, it is safe to shift in current digit. Otherwise, it should be handled carefully by append_digit.

 

397  static cpp_num

398  append_digit (cpp_num num, int digit, int base, size_t precision)                   in cppexp.c

399  {

400    cpp_num result;

401    unsigned int shift = 3 + (base == 16);

402    bool overflow;

403    cpp_num_part add_high, add_low;

404 

405    /* Multiply by 8 or 16. Catching this overflow here means we don't

406      need to worry about add_high overflowing.  */

407    overflow = !!(num.high >> (PART_PRECISION - shift));

408    result.high = num.high << shift;

409    result.low = num.low << shift;

410    result.high |= num.low >> (PART_PRECISION - shift);

411  

412    if (base == 10)

413    {

414      add_low = num.low << 1;

415      add_high = (num.high << 1) + (num.low >> (PART_PRECISION - 1));

416    }

417    else

418      add_high = add_low = 0;

419 

420    if (add_low + digit < add_low)

421      add_high++;

422    add_low += digit;

423     

424    if (result.low + add_low < result.low)

425      add_high++;

426    if (result.high + add_high < result.high)

427      overflow = true;

428 

429    result.low += add_low;

430    result.high += add_high;

431 

432   /* The above code catches overflow of a cpp_num type. This catches

433      overflow of the (possibly shorter) target precision.  */

434    num.low = result.low;

435    num.high = result.high;

436    result = num_trim (result, precision);

437    if (!num_eq (result, num))

438      overflow = true;

439 

440    result.unsignedp = num.unsignedp;

441    result.overflow = overflow;

442    return result;

443  }

 

As it uses slots high and low to hold the interpreted number, it first checks whether this extra digit will cause result overflows. Notice that only for base of 16, shift will be 4, otherwise it is 3. Nevertheless for base of 10, shifting the number with 3 bits only mulitpy the number by 8 instead of 10, so line 414 to 415 adds the left 2 multiple of the number. This addition may overflow, a checking is taken in line 424 to 426.

Obviously, the result number is apt to overwhelm the precision specified by pfile (parameter precision). Routine num_trim trims the result according to precision.

 

1004 static cpp_num

1005 num_trim (cpp_num num, size_t precision)                                                 in cppexp.c

1006 {

1007   if (precision > PART_PRECISION)

1008   {

1009     precision -= PART_PRECISION;

1010     if (precision < PART_PRECISION)

1011       num.high &= ((cpp_num_part) 1 << precision) - 1;

1012   }

1013   else

1014   {

1015     if (precision < PART_PRECISION)

1016       num.low &= ((cpp_num_part) 1 << precision) - 1;

1017     num.high = 0;

1018   }

1019

1020   return num;

1021 }

 

Anyway, if the trimmed result is not same as that before trimming, of course, overflow occurs. Returned from append_digit, as it is the last digit can be added for the specified precision, at line 670 in cpp_interpret_integer, max is set as 0 and forces condition at line 364 always false for following digits.

 

cpp_interpret_integer (continue)

 

374      if (overflow)

375        cpp_error (pfile, CPP_DL_PEDWARN,

376                 "integer constant is too large for its type");

377      /* If too big to be signed, consider it unsigned. Only warn for

378        decimal numbers. Traditional numbers were always signed (but

379        we still honor an explicit U suffix); but we only have

380        traditional semantics in directives.  */

381      else if (!result.unsignedp

382            && !(CPP_OPTION (pfile, traditional)

383            && pfile->state.in_directive)

384            && !num_positive (result, precision))

385      {

386        if (base == 10)

387          cpp_error (pfile, CPP_DL_WARNING,

388                   "integer constant is so large that it is unsigned");

389        result.unsignedp = true;

390      }

391    }

392 

393    return result;

394  }

 

As tree node of INTEGER_CST also uses high and low slots of HOST_WIDE_INT with format of 2 complement coding to hold number. For signed number, sign extension is required. It is done by cpp_num_sign_extend.

 

1038 cpp_num

1039 cpp_num_sign_extend (cpp_num num, size_t precision)                                in cppexp.c

1040 {

1041   if (!num.unsignedp)

1042   {

1043     if (precision > PART_PRECISION)

1044     {

1045       precision -= PART_PRECISION;

1046       if (precision < PART_PRECISION

1047           && (num.high & (cpp_num_part) 1 << (precision - 1)))

1048         num.high |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision));

1049     }

1050     else if (num.low & (cpp_num_part) 1 << (precision - 1))

1051     {

1052       if (precision < PART_PRECISION)

1053         num.low |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision));

1054       num.high = ~(cpp_num_part) 0;

1055     }

1056   }

1057

1058   return num;

1059 }

 

Then the validated number will be filled the node value created by built_int_2_wide at line 500 in interpret_integer.

Following, it needs associate type for the result. As we have seen, during compiler starts up, type nodes for integer have been created. The type that most fit (containing as less bit as possible) will be selected.

 

interpret_integer (continue)

 

502    /* The type of a constant with a U suffix is straightforward.  */

503    if (flags & CPP_N_UNSIGNED)

504      itk = narrowest_unsigned_type (value, flags);

505    else

506    {

507      /* The type of a potentially-signed integer constant varies

508        depending on the base it's in, the standard in use, and the

509        length suffixes.  */

510      enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);

511       enum integer_type_kind itk_s = narrowest_signed_type (value, flags);

512 

513      /* In both C89 and C99, octal and hex constants may be signed or

514        unsigned, whichever fits tighter. We do not warn about this

515        choice differing from the traditional choice, as the constant

516        is probably a bit pattern and either way will work.  */

517      if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)

518        itk = MIN (itk_u, itk_s);

519      else

520      {

521        /* In C99, decimal constants are always signed.

522          In C89, decimal constants that don't fit in long have

523          undefined behavior; we try to make them unsigned long.

524          In GCC's extended C89, that last is true of decimal

525          constants that don't fit in long long, too.  */

526 

527        itk = itk_s;

528        if (itk_s > itk_u && itk_s > itk_long)

529        {

530          if (!flag_isoc99)

531          {

532            if (itk_u < itk_unsigned_long)

533              itk_u = itk_unsigned_long;

534            itk = itk_u;

535            warning ("this decimal constant is unsigned only in ISO C90");

536          }

537          else if (warn_traditional)

538            warning ("this decimal constant would be unsigned in ISO C90");

539        }

540      }

541    }

542 

543    if (itk == itk_none)

544      /* cpplib has already issued a warning for overflow.  */

545      type = ((flags & CPP_N_UNSIGNED)

546         ? widest_unsigned_literal_type_node

547         : widest_integer_literal_type_node);

548    else

549      type = integer_types[itk];

550 

551    if (itk > itk_unsigned_long

552        && (flags & CPP_N_WIDTH) != CPP_N_LARGE

553        && ! in_system_header && ! flag_isoc99)

554      pedwarn ("integer constant is too large for /"%s/" type",

555              (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");

556 

557    TREE_TYPE (value) = type;

558 

559    /* Convert imaginary to a complex type.  */

560    if (flags & CPP_N_IMAGINARY)

561      value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);

562 

563    return value;

564  }

 

Routine narrowest_unsigned_type and narrowest_signed_type is resemble. For integer constant, it is regarded as CPP_N_SMALL without suffix, and CPP_N_MEDIUM with suffix ‘l/L’, otherwise  CPP_N_LARGE (refer to cpp_classify_number).

 

442  static enum integer_type_kind

443  narrowest_unsigned_type (tree value, unsigned int flags)                               in c-lex.c

444  {

445    enum integer_type_kind itk;

446 

447    if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)

448      itk = itk_unsigned_int;

449    else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)

450      itk = itk_unsigned_long;

451    else

452      itk = itk_unsigned_long_long;

453 

454    /* int_fits_type_p must think the type of its first argument is

455      wider than its second argument, or it won't do the proper check.  */

456    TREE_TYPE (value) = widest_unsigned_literal_type_node;

457 

458    for (; itk < itk_none; itk += 2 /* skip unsigned types */)

459      if (int_fits_type_p (value, integer_types[itk]))

460        return itk;

461 

462    return itk_none;

463  }

 

At line 459, integer_types is designed as unsigned and signed appear alternatively. Then at line 456, see that value is set as widest_unsigned_literal_type_node deliberately (it’s long long in 32 bit x86 system), which is the largest int type the system can support. And it will affect the result of int_fits_type_p.

 

4243 int

4244 int_fits_type_p (tree c, tree type)                                                                       in tree.c

4245 {

4246   tree type_low_bound = TYPE_MIN_VALUE (type);

4247   tree type_high_bound = TYPE_MAX_VALUE (type);

4248   int ok_for_low_bound, ok_for_high_bound;

4249

4250   /* Perform some generic filtering first, which may allow making a decision

4251     even if the bounds are not constant. First, negative integers never fit

4252     in unsigned types, */

4253   if ((TREE_UNSIGNED (type) && tree_int_cst_sgn (c) < 0)

4254       /* Also, unsigned integers with top bit set never fit signed types.  */

4255       || (! TREE_UNSIGNED (type)

4256           && TREE_UNSIGNED (TREE_TYPE (c)) && tree_int_cst_msb (c)))

4257     return 0;

4258

4259   /* If at least one bound of the type is a constant integer, we can check

4260     ourselves and maybe make a decision. If no such decision is possible, but

4261     this type is a subtype, try checking against that. Otherwise, use

4262     force_fit_type, which checks against the precision.

4263

4264     Compute the status for each possibly constant bound, and return if we see

4265     one does not match. Use ok_for_xxx_bound for this purpose, assigning -1

4266     for "unknown if constant fits", 0 for "constant known *not* to fit" and 1

4267     for "constant known to fit".  */

4268

4269   ok_for_low_bound = -1;

4270   ok_for_high_bound = -1;

4271

4272   /* Check if C >= type_low_bound.  */

4273   if (type_low_bound && TREE_CODE (type_low_bound) == INTEGER_CST)

4274   {

4275     ok_for_low_bound = ! tree_int_cst_lt (c, type_low_bound);

4276     if (! ok_for_low_bound)

4277        return 0;

4278   }

4279

4280   /* Check if c <= type_high_bound.  */

4281   if (type_high_bound && TREE_CODE (type_high_bound) == INTEGER_CST)

4282   {

4283     ok_for_high_bound = ! tree_int_cst_lt (type_high_bound, c);

4284     if (! ok_for_high_bound)

4285        return 0;

4286   }

4287

4288   /* If the constant fits both bounds, the result is known.  */

4289   if (ok_for_low_bound == 1 && ok_for_high_bound == 1)

4290     return 1;

4291

4292   /* If we haven't been able to decide at this point, there nothing more we

4293     can check ourselves here. Look at the base type if we have one.  */

4294   else if (TREE_CODE (type) == INTEGER_TYPE && TREE_TYPE (type) != 0)

4295     return int_fits_type_p (c, TREE_TYPE (type));

4296

4297   /* Or to force_fit_type, if nothing else.  */

4298   else

4299   {

4300     c = copy_node (c);

4301     TREE_TYPE (c) = type;

4302     return !force_fit_type (c, 0);

4303   }

4304 }

 

We have seen in before, TYPE_MIN_VALUE and TYPE_MAX_VALUE of type node represents the min and max value the type can hold. If out of this range, the type isn’t suitable. Besides, for minus, unsigned type isn’t right; and for unsigned number using most significant bit, signed type can’t be used.

Note that only when the type hasn’t TYPE_MIN_VALUE and TYPE_MAX_VALUE set (e.g.,  typedef declaration of int), we will enter code below line 4292.

If narrowest_unsigned_type can’t find out a suitable type, it can only use the largest type the system supports (comment at line 544 points out that the compiler should have given warning about overflow). This found type, is set as the type for the parsing number at line 557 in interpret_integer. And this node of number is returned to value field of cp_token.

 

你可能感兴趣的:(Studying note of GCC-3.4.6 source (78))