Node in GCC to represent real constant is tree_real_cst as below.
702 struct tree_real_cst GTY(()) in tree.h
703 {
704 struct tree_common common;
705 struct real_value * real_cst_ptr;
706 };
The kernel of tree_real_cst is real_value, which defines floating point value following related standard.
43 struct real_value GTY(()) in real.h
44 {
45 ENUM_BITFIELD (real_value_class) class : 2;
46 unsigned int sign : 1;
47 unsigned int signalling : 1;
48 unsigned int canonical : 1;
49 signed int exp : EXP_BITS;
50 unsigned long sig[SIGSZ];
51 };
52
53 /* Various headers condition prototypes on #ifdef REAL_VALUE_TYPE, so it
54 needs to be a macro. We do need to continue to have a structure tag
55 so that other headers can forward declare it. */
56 #define REAL_VALUE_TYPE struct real_value
In above definition, using some of the macros below defined for real type value.
37 #define SIGNIFICAND_BITS (128 + HOST_BITS_PER_LONG) in real.h
38 #define EXP_BITS (32 - 5)
39 #define MAX_EXP ((1 << (EXP_BITS - 1)) - 1)
40 #define SIGSZ (SIGNIFICAND_BITS / HOST_BITS_PER_LONG)
41 #define SIG_MSB ((unsigned long)1 << (HOST_BITS_PER_LONG - 1))
So inBesides the structure of real_value, theis close resembel to float format. The exp field is the exponient part of the format, the sig field is the signicant part.
Creating node of REAL_CST from real_value is straight-forward
466 tree
467 build_real (tree type, REAL_VALUE_TYPE d) in tree.c
468 {
469 tree v;
470 REAL_VALUE_TYPE *dp;
471 int overflow = 0;
472
473 /* ??? Used to check for overflow here via CHECK_FLOAT_TYPE.
474 Consider doing it via real_convert now. */
475
476 v = make_node (REAL_CST);
477 dp = ggc_alloc (sizeof (REAL_VALUE_TYPE));
478 memcpy (dp, &d, sizeof (REAL_VALUE_TYPE));
479
480 TREE_TYPE (v) = type;
481 TREE_REAL_CST_PTR (v) = dp;
482 TREE_OVERFLOW (v) = TREE_CONSTANT_OVERFLOW (v) = overflow;
483 return v;
484 }
Notice that at line 481, in tree_real_cst, field TREE_REAL_CST_PTR points to the created real_value object.
For C/C++, in arithmetic expression, there is so-called type raising for operands. When both floating point and integer appear in the calculation, the integer needs be raised to the type of floating point. This type raising is done by following function.
507 tree
508 build_real_from_int_cst (tree type, tree i) in tree.c
509 {
510 tree v;
511 int overflow = TREE_OVERFLOW (i);
512
513 v = build_real (type, real_value_from_int_cst (type, i));
514
515 TREE_OVERFLOW (v) |= overflow;
516 TREE_CONSTANT_OVERFLOW (v) |= overflow;
517 return v;
518 }
Generating floating point value of real_value from integer constant is not easy task. At here the entry point is real_value_from_int_cst.
489 REAL_VALUE_TYPE
490 real_value_from_int_cst (tree type, tree i) in tree.c
491 {
492 REAL_VALUE_TYPE d;
493
494 /* Clear all bits of the real value type so that we can later do
495 bitwise comparisons to see if two values are the same. */
496 memset (&d, 0, sizeof d);
497
498 real_from_integer (&d, type ? TYPE_MODE (type) : VOIDmode,
499 TREE_INT_CST_LOW (i), TREE_INT_CST_HIGH (i),
500 TREE_UNSIGNED (TREE_TYPE (i)));
501 return d;
502 }
The real transformation is done by real_from_integer.
1957 void
1958 real_from_integer (REAL_VALUE_TYPE *r, enum machine_mode mode, in real.c
1959 unsigned HOST_WIDE_INT low, HOST_WIDE_INT high,
1960 int unsigned_p)
1961 {
1962 if (low == 0 && high == 0)
1963 get_zero (r, 0);
For constant of 0 the creation is simple. And notice that sign passed in is hard coded as 0.
128 static inline void
129 get_zero (REAL_VALUE_TYPE *r, int sign) in real.c
130 {
131 memset (r, 0, sizeof (*r));
132 r->sign = sign;
133 }
For other real const, as we know the precision of floating format is definite, and also the max and& min value it can presented. So the class field of real_value distinguishes these situations, it can be one of following values: rvc_zero, rvc_normal, rvc_inf, rvc_nan, In which, inf stands for infinite, nan stands for not a number, it can be caused by zero divided by zero, etc. For value other than zero, real_from_integer does as below.
real_from_integer (continue)
1965 else
1966 {
1967 memset (r, 0, sizeof (*r));
1968 r->class = rvc_normal;
1969 r->sign = high < 0 && !unsigned_p;
1970 r->exp = 2 * HOST_BITS_PER_WIDE_INT;
1971
1972 if (r->sign)
1973 {
1974 high = ~high;
1975 if (low == 0)
1976 high += 1;
1977 else
1978 low = -low;
1979 }
1980
1981 if (HOST_BITS_PER_LONG == HOST_BITS_PER_WIDE_INT)
1982 {
1983 r->sig[SIGSZ-1] = high;
1984 r->sig[SIGSZ-2] = low;
1985 }
1986 else if (HOST_BITS_PER_LONG*2 == HOST_BITS_PER_WIDE_INT)
1987 {
1988 r->sig[SIGSZ-1] = high >> (HOST_BITS_PER_LONG - 1) >> 1;
1989 r->sig[SIGSZ-2] = high;
1990 r->sig[SIGSZ-3] = low >> (HOST_BITS_PER_LONG - 1) >> 1;
1991 r->sig[SIGSZ-4] = low;
1992 }
1993 else
1994 abort ();
1995
1996 normalize (r);
1997 }
1998
1999 if (mode != VOIDmode)
2000 real_convert (r, mode, r);
2001 }
Please note the snippet from line 1972 to 1979, we use 2 compliant coding method, and the initial value of exp is set as 2 * HOST_BITS_PER_WIDE_INT (for our circumstance Linux, x86 – it’s 128) at line 1970.
As we know sig field saves the significant bits of the value, it is expected to have the value to occupy as many as meaningful bits as possible (for example, for 0.000001, since the format of floating point specifies bits used for exponent and significant; if we change it to 1.000000*10-6, then by same set of bits, the precision can be raised by 1000000 times). So we need normalize.
477 static void
478 normalize (REAL_VALUE_TYPE *r) in real.c
479 {
480 int shift = 0, exp;
481 int i, j;
482
483 /* Find the first word that is nonzero. */
484 for (i = SIGSZ- 1; i >= 0; i--)
485 if (r->sig[i] == 0)
486 shift += HOST_BITS_PER_LONG;
487 else
488 break;
489
490 /* Zero significand flushes to zero. */
491 if (i < 0)
492 {
493 r->class = rvc_zero;
494 r->exp = 0;
495 return;
496 }
497
498 /* Find the first bit that is nonzero. */
499 for (j = 0; ; j++)
500 if (r->sig[i] & ((unsigned long)1 << (HOST_BITS_PER_LONG - 1 - j)))
501 break;
502 shift += j;
503
504 if (shift > 0)
505 {
506 exp = r->exp - shift;
507 if (exp > MAX_EXP)
508 get_inf (r, r->sign);
509 else if (exp < -MAX_EXP)
510 get_zero (r, r->sign);
511 else
512 {
513 r->exp = exp;
514 lshift_significand (r, r, shift);
515 }
516 }
517 }
Remember that, for integer constant, we first set exp field with 128. It is done with deliberation. For constant 1 for example, the exp field will be 1 (obviously, it indicates the bits of the integral part of the value), and the value 1 will be shift to the left-most position of the numeric string, followed by long series of 0s after the radix point.
236 static void
237 lshift_significand (REAL_VALUE_TYPE *r, in real.c
238 const REAL_VALUE_TYPE *a, unsigned int n)
239 {
240 unsigned int i, ofs = n / HOST_BITS_PER_LONG;
241
242 n &= HOST_BITS_PER_LONG - 1;
243 if (n == 0)
244 {
245 for (i = 0; ofs + i < SIGSZ; ++i)
246 r->sig[SIGSZ-1-i] = a->sig[SIGSZ-1-i-ofs];
247 for (; i < SIGSZ; ++i)
248 r->sig[SIGSZ-1-i] = 0;
249 }
250 else
251 for (i = 0; i < SIGSZ; ++i)
252 {
253 r->sig[SIGSZ-1-i]
254 = (((ofs + i >= SIGSZ? 0 : a->sig[SIGSZ-1-i-ofs]) << n)
255 | ((ofs + i + 1 >= SIGSZ? 0 : a->sig[SIGSZ-1-i-ofs-1])
256 >> (HOST_BITS_PER_LONG - n)));
257 }
258 }
lshift_significand is not diffcult, it moves the bits according to the number of shift required.
Though we have put the integer constant into real_value type, it is just a internal type, not match for any floating point machine mode, we need further convert the value into the format defined by machine mode. real_convert shows us the procedure.
2368 void
2369 real_convert (REAL_VALUE_TYPE *r, enum machine_mode mode, in real.c
2370 const REAL_VALUE_TYPE *a)
2371 {
2372 const struct real_format *fmt;
2373
2374 fmt = REAL_MODE_FORMAT (mode);
2375 if (fmt == NULL)
2376 abort ();
2377
2378 *r = *a;
2379 round_for_format (fmt, r);
2380
2381 /* round_for_format de-normalizes denormals. Undo just that part. */
2382 if (r->class == rvc_normal)
2383 normalize (r);
2384 }
149 #define REAL_MODE_FORMAT(MODE) (real_format_for_mode[(MODE)-MIN_MODE_FLOAT])
real_format_for_mode in the definition of REAL_MODE_FORMAT is a global array of type real_format. It records the detail of conversion between the certain mode and floating point value. It has been initialized in machmode.def, and currently there are two formats available - ieee_single_format and& ieee_double_format. For other floating point formats, it will be overwriten in process_options by OVERRIDE_OPTIONS.
106 struct real_format in real.h
107 {
108 /* Move to and from the target bytes. */
109 void (*encode) (const struct real_format *, long *,
110 const REAL_VALUE_TYPE *);
111 void (*decode) (const struct real_format *, REAL_VALUE_TYPE *,
112 const long *);
113
114 /* The radix of the exponent and digits of the significand. */
115 int b;
116
117 /* log2(b). */
118 int log2_b;
119
120 /* Size of the significand in digits of radix B. */
121 int p;
122
123 /* Size of the significant of a NaN, in digits of radix B. */
124 int pnan;
125
126 /* The minimum negative integer, x, such that b**(x-1) is normalized. */
127 int emin;
128
129 /* The maximum integer, x, such that b**(x-1) is representable. */
130 int emax;
131
132 /* The bit position of the sign bit, or -1 for a complex encoding. */
133 int signbit;
134
135 /* Properties of the format. */
136 bool has_nans;
137 bool has_inf;
138 bool has_denorm;
139 bool has_signed_zero;
140 bool qnan_msb_set;
141 };
Notice that real_format can support arbitrary radix thanks for b field. For ieee_single_format and& ieee_double_format we get:
2652 const struct real_format ieee_single_format = in real.c
2653 {
2654 encode_ieee_single,
2655 decode_ieee_single,
2656 2,
2657 1,
2658 24,
2659 24,
2660 -125,
2661 128,
2662 31,
2663 true,
2664 true,
2665 true,
2666 true,
2667 true
2668 };
2875 const struct real_format ieee_double_format = in real.c
2876 {
2877 encode_ieee_double,
2878 decode_ieee_double,
2879 2,
2880 1,
2881 53,
2882 53,
2883 -1021,
2884 1024,
2885 63,
2886 true,
2887 true,
2888 true,
2889 true,
2890 true
2891 };
With above knowledge, round_for_format thought long, but not diffcult to understand. As we have certain bits for significand, to make the value as preicise as possible, we need make use of these bits as much. So it’s the need to do the shift appropriately as below.
2236 static void
2237 round_for_format (const struct real_format *fmt, REAL_VALUE_TYPE *r) in real.c
2238 {
2239 int p2, np2, i, w;
2240 unsigned long sticky;
2241 bool guard, lsb;
2242 int emin2m1, emax2;
2243
2244 p2 = fmt->p * fmt->log2_b;
2245 emin2m1 = (fmt->emin - 1) * fmt->log2_b;
2246 emax2 = fmt->emax * fmt->log2_b;
2247
2248 np2 = SIGNIFICAND_BITS - p2;
2249 switch (r->class)
2250 {
2251 underflow:
2252 get_zero (r, r->sign);
2253 case rvc_zero:
2254 if (!fmt->has_signed_zero)
2255 r->sign = 0;
2256 return;
2257
2258 overflow:
2259 get_inf (r, r->sign);
2260 case rvc_inf:
2261 return;
2262
2263 case rvc_nan:
2264 clear_significand_below (r, np2);
2265 return;
2266
2267 case rvc_normal:
2268 break;
2269
2270 default:
2271 abort ();
2272 }
2273
2274 /* If we're not base2, normalize the exponent to a multiple of
2275 the true base. */
2276 if (fmt->log2_b != 1)
2277 {
2278 int shift = r->exp & (fmt->log2_b - 1);
2279 if (shift)
2280 {
2281 shift = fmt->log2_b - shift;
2282 r->sig[0] |= sticky_rshift_significand (r, r, shift);
2283 r->exp += shift;
2284 }
2285 }
2286
2287 /* Check the range of the exponent. If we're out of range,
2288 either underflow or overflow. */
2289 if (r->exp > emax2)
2290 goto overflow;
2291 else if (r->exp <= emin2m1)
2292 {
2293 int diff;
2294
2295 if (!fmt->has_denorm)
2296 {
2297 /* Don't underflow completely until we've had a chance to round. */
2298 if (r->exp < emin2m1)
2299 goto underflow;
2300 }
2301 else
2302 {
2303 diff = emin2m1 - r->exp + 1;
2304 if (diff > p2)
2305 goto underflow;
2306
2307 /* De-normalize the significand. */
2308 r->sig[0] |= sticky_rshift_significand (r, r, diff);
2309 r->exp += diff;
2310 }
2311 }
At line 2244 above, the format we used may not based on binary, log2_b field of real_format records log2 of the base, and p field records size of the significant in digits of base. So at here, we put the siginficant into that in digits of binary. And line 2245, 2246, gets the corresponding min and max value of the format.
As we already saw, exp field of real_value is the exponential part of the value in binary form. If the target format is not binary form and the exponential part overlaps with new format radix, we need do specifical right shift with the overlapped portion by sticky_rshift_significand as below. Next, if the representation has exponent overflowed, it has to right shift the significant by sacrificing precision.
168 static bool
169 sticky_rshift_significand (REAL_VALUE_TYPE *r, in real.c
170 const REAL_VALUE_TYPE *a, unsigned int n)
171 {
172 unsigned long sticky = 0;
173 unsigned int i, ofs = 0;
174
175 if (n >= HOST_BITS_PER_LONG)
176 {
177 for (i = 0, ofs = n / HOST_BITS_PER_LONG; i < ofs; ++i)
178 sticky |= a->sig[i];
179 n &= HOST_BITS_PER_LONG - 1;
180 }
181
182 if (n != 0)
183 {
184 sticky |= a->sig[ofs] & (((unsigned long)1 << n) - 1);
185 for (i = 0; i < SIGSZ; ++i)
186 {
187 r->sig[i]
188 = (((ofs + i >= SIGSZ? 0 : a->sig[ofs + i]) >> n)
189 | ((ofs + i + 1 >= SIGSZ? 0 : a->sig[ofs + i + 1])
190 << (HOST_BITS_PER_LONG - n)));
191 }
192 }
193 else
194 {
195 for (i = 0; ofs + i < SIGSZ; ++i)
196 r->sig[i] = a->sig[ofs + i];
197 for (; i < SIGSZ; ++i)
198 r->sig[i] = 0;
199 }
200
201 return sticky != 0;
202 }
sticky_rshift_significand will return true if any 1 bits is shifted out. This is returned value (1 here) will be IOR with the least significant bit of significand of the new format. Obvoiusly, it is the rounding operation with precision loss.
Then if the exponential part is larger than the max number presented by the target format, it is no doubt overflow. And if the exponential part is smaller than the min normalized number of the target format, it is dependent upon if the target format can be denormalized or not. For format supports denormalized form, if the significand part of new form can accommodate the value, then denormalize the value with rounding by sticky_rshift_significand.
round_for_format (continue)
2313 /* There are P2 true significand bits, followed by one guard bit,
2314 followed by one sticky bit, followed by stuff. Fold nonzero
2315 stuff into the sticky bit. */
2316
2317 sticky = 0;
2318 for (i = 0, w = (np2 - 1) / HOST_BITS_PER_LONG; i < w; ++i)
2319 sticky |= r->sig[i];
2320 sticky |=
2321 r->sig[w] & (((unsigned long)1 << ((np2 - 1) % HOST_BITS_PER_LONG)) - 1);
2322
2323 guard = test_significand_bit (r, np2 - 1);
2324 lsb = test_significand_bit (r, np2);
2325
2326 /* Round to even. */
2327 if (guard && (sticky || lsb))
2328 {
2329 REAL_VALUE_TYPE u;
2330 get_zero (&u, 0);
2331 set_significand_bit (&u, np2);
2332
2333 if (add_significands (r, r, &u))
2334 {
2335 /* Overflow. Means the significand had been all ones, and
2336 is now all zeros. Need to increase the exponent, and
2337 possibly re-normalize it. */
2338 if (++r->exp > emax2)
2339 goto overflow;
2340 r->sig[SIGSZ-1] = SIG_MSB;
2341
2342 if (fmt->log2_b != 1)
2343 {
2344 int shift = r->exp & (fmt->log2_b - 1);
2345 if (shift)
2346 {
2347 shift = fmt->log2_b - shift;
2348 rshift_significand (r, r, shift);
2349 r->exp += shift;
2350 if (r->exp > emax2)
2351 goto overflow;
2352 }
2353 }
2354 }
2355 }
2356
2357 /* Catch underflow that we deferred until after rounding. */
2358 if (r->exp <= emin2m1)
2359 goto underflow;
2360
2361 /* Clear out trailing garbage. */
2362 clear_significand_below (r, np2);
2363 }
Above, it handles the shift comes from different radix. Then last part of round_for_format handles the different size of significant bits. As we have seen, for real_value, the bits used for significant bits is very large - 128 + HOST_BITS_PER_LONG (for Linux, x86 it is 192), and the floating point formats use less bits. So for the conversion, the truncation must be given carefully consideration.
Note that at line 2324, bit at position np2 is the least significant bit (lsb) of the value being truncated, at line 2323 guard records the next least significant bit of that part. As a fair method of rounding, if the part being truncated is larger than guard, we need do the rounding by adding 1 upon the value after truncation. It is done by add_significands.
275 static inline bool
276 add_significands (REAL_VALUE_TYPE *r, const REAL_VALUE_TYPE *a, in real.c
277 const REAL_VALUE_TYPE *b)
278 {
279 bool carry = false;
280 int i;
281
282 for (i = 0; i < SIGSZ; ++i)
283 {
284 unsigned long ai = a->sig[i];
285 unsigned long ri = ai + b->sig[i];
286
287 if (carry)
288 {
289 carry = ri < ai;
290 carry |= ++ri == 0;
291 }
292 else
293 carry = ri < ai;
294
295 r->sig[i] = ri;
296 }
297
298 return carry;
299 }
As we hanlding the value as unsigned, if the sum is less than operand it means carry for the portion, carry is set. If he sum has carry in the end, we just adjust exponent to accommodate the value. Notice that for our case, we just add 1 upon orignial value of the exponent. For the sum overflows, it only can be the original value contains all 1s in signficant bits. If no overflow occurs, it needs re-normalize the value gotten from line 2340.