解析整数字符串已经相当麻烦,而浮点数字符串的情况更要复杂得多。同样首先为将要创建的REAL_CST节点选定伴随的类型节点。默认的浮点数具有类型double(CPP_N_MEDIUM)。
568 static tree
569 interpret_float (const cpp_token *token, unsigned int flags) in c-lex.c
570 {
571 tree type;
572 tree value;
573 REAL_VALUE_TYPE real;
574 char *copy;
575 size_t copylen;
576 const char *typename;
577
578 /* FIXME: make %T work in error/warning, then we don't need typename. */
579 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
580 {
581 type = long_double_type_node;
582 typename = "long double";
583 }
584 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
585 || flag_single_precision_constant)
586 {
587 type = float_type_node;
588 typename = "float";
589 }
590 else
591 {
592 type = double_type_node;
593 typename = "double";
594 }
595
596 /* Copy the constant to a nul-terminated buffer. If the constant
597 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
598 can't handle them. */
599 copylen = token->val.str.len;
600 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
601 /* Must be an F or L suffix. */
602 copylen--;
603 if (flags & CPP_N_IMAGINARY)
604 /* I or J suffix. */
605 copylen--;
606
607 copy = alloca (copylen + 1);
608 memcpy (copy, token->val.str.text, copylen);
609 copy[copylen] = '/0';
610
611 real_from_string (&real, copy);
612 real_convert (&real, TYPE_MODE (type), &real);
613
614 /* A diagnostic is required for "soft" overflow by some ISO C
615 testsuites. This is not pedwarn, because some people don't want
616 an error for this.
617 ??? That's a dubious reason... is this a mandatory diagnostic or
618 isn't it? -- zw, 2001-08-21. */
619 if (REAL_VALUE_ISINF (real) && pedantic)
620 warning ("floating constant exceeds range of /"%s/"", typename);
621
622 /* Create a node with determined type and value. */
623 value = build_real (type, real);
624 if (flags & CPP_N_IMAGINARY)
625 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
626
627 return value;
628 }
接下来,确定浮点数字符串所对应的浮点值。我们已经看到浮点数常量有2种表示方式,一种是16进制,其指数部分以2为底;另一种是10进制,其指数部分以10为底。不过不管是何种方式,首先在1765行将r清0,并置为无符号。
1759 void
1760 real_from_string (REAL_VALUE_TYPE *r, const char *str) in real.c
1761{
1762 int exp = 0;
1763 bool sign = false;
1764
1765 get_zero (r, 0);
1766
1767 if (*str == '-')
1768 {
1769 sign = true;
1770 str++;
1771 }
1772 else if (*str == '+')
1773 str++;
1774
1775 if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1776 {
1777 /* Hexadecimal floating point. */
1778 int pos = SIGNIFICAND_BITS - 4, d;
1779
1780 str += 2;
1781
1782 while (*str == '0')
1783 str++;
1784 while (1)
1785 {
1786 d = hex_value (*str);
1787 if (d == _hex_bad)
1788 break;
1789 if (pos >= 0)
1790 {
1791 r->sig[pos / HOST_BITS_PER_LONG]
1792 |= (unsigned long) d << (pos % HOST_BITS_PER_LONG);
1793 pos -= 4;
1794 }
1795 exp += 4;
1796 str++;
1797 }
1798 if (*str == '.')
1799 {
1800 str++;
1801 if (pos == SIGNIFICAND_BITS - 4)
1802 {
1803 while (*str == '0')
1804 str++, exp -= 4;
1805 }
1806 while (1)
1807 {
1808 d = hex_value (*str);
1809 if (d == _hex_bad)
1810 break;
1811 if (pos >= 0)
1812 {
1813 r->sig[pos / HOST_BITS_PER_LONG]
1814 |= (unsigned long) d << (pos % HOST_BITS_PER_LONG);
1815 pos -= 4;
1816 }
1817 str++;
1818 }
1819 }
1820 if (*str == 'p' || *str == 'P')
1821 {
1822 bool exp_neg = false;
1823
1824 str++;
1825 if (*str == '-')
1826 {
1827 exp_neg = true;
1828 str++;
1829 }
1830 else if (*str == '+')
1831 str++;
1832
1833 d = 0;
1834 while (ISDIGIT (*str))
1835 {
1836 d *= 10;
1837 d += *str - '0';
1838 if (d > MAX_EXP)
1839 {
1840 /* Overflowed the exponent. */
1841 if (exp_neg)
1842 goto underflow;
1843 else
1844 goto overflow;
1845 }
1846 str++;
1847 }
1848 if (exp_neg)
1849 d = -d;
1850
1851 exp += d;
1852 }
1853
1854 r->class = rvc_normal;
1855 r->exp = exp;
1856
1857 normalize (r);
1858 }
对于第一种方式,它以“0x/0X”开头。REAL_VALUE_TYPE我们在前面看过(tree_real_cst节点一节),是编译器表示浮点数的内部形式。SIGNIFICAND_BITS是该形式的尾数位(128 + HOST_BITS_PER_LONG = 160),EXP_BITS是其指数位(27)。从上面的代码可以看出,如果给出的字符串过长,填满尾数位后,编译器不再理会尾数位,只是增加指数位(如果字符串中没有指数部分,这个指数位留给normalize检查)。注意1795及1804行,对于exp的调整,这使得尾数位保持在0.xx…x这样的形式。1857行的normalize则对得到的数值进行规范化,使精度尽可能的高(这里基本无事可做)。
而第二种10进制方式,不能通过简单地移位来实现进位,只能采用稍许麻烦些的办法。
real_from_string (continue)
1859 else
1860 {
1861 /* Decimal floating point. */
1862 const REAL_VALUE_TYPE *ten = ten_to_ptwo (0);
1863 int d;
1864
1865 while (*str == '0')
1866 str++;
1867 while (ISDIGIT (*str))
1868 {
1869 d = *str++ - '0';
1870 do_multiply (r, r, ten);
1871 if (d)
1872 do_add (r, r, real_digit (d), 0);
1873 }
1874 if (*str == '.')
1875 {
1876 str++;
1877 if (r->class == rvc_zero)
1878 {
1879 while (*str == '0')
1880 str++, exp--;
1881 }
1882 while (ISDIGIT (*str))
1883 {
1884 d = *str++ - '0';
1885 do_multiply (r, r, ten);
1886 if (d)
1887 do_add (r, r, real_digit (d), 0);
1888 exp--;
1889 }
1890 }
1891
1892 if (*str == 'e' || *str == 'E')
1893 {
1894 bool exp_neg = false;
1895
1896 str++;
1897 if (*str == '-')
1898 {
1899 exp_neg = true;
1900 str++;
1901 }
1902 else if (*str == '+')
1903 str++;
1904
1905 d = 0;
1906 while (ISDIGIT (*str))
1907 {
1908 d *= 10;
1909 d += *str - '0';
1910 if (d > MAX_EXP)
1911 {
1912 /* Overflowed the exponent. */
1913 if (exp_neg)
1914 goto underflow;
1915 else
1916 goto overflow;
1917 }
1918 str++;
1919 }
1920 if (exp_neg)
1921 d = -d;
1922 exp += d;
1923 }
1924
1925 if (exp)
1926 times_pten (r, exp);
1927 }
1928
1929 r->sign = sign;
1930 return;
1931
1932 underflow:
1933 get_zero (r, sign);
1934 return;
1935
1936 overflow:
1937 get_inf (r, sign);
1938 return;
1939 }
函数ten_to_ptwo返回REAL_VALUE_TYPE 形式的数值10**2**n。在1862行,我们得到的是10。注意2012行,rvc_zero表明该tens的数值还没有计算出来,否则它应该是rvc_normal。
2004 static const REAL_VALUE_TYPE * in real.c
2005 ten_to_ptwo (int n)
2006 {
2007 static REAL_VALUE_TYPE tens[EXP_BITS];
2008
2009 if (n < 0 || n >= EXP_BITS)
2010 abort ();
2011
2012 if (tens[n].class == rvc_zero)
2013 {
2014 if (n < (HOST_BITS_PER_WIDE_INT == 64 ? 5 : 4))
2015 {
2016 HOST_WIDE_INT t = 10;
2017 int i;
2018
2019 for (i = 0; i < n; ++i)
2020 t *= t;
2021
2022 real_from_integer (&tens[n], VOIDmode, t, 0, 1);
2023 }
2024 else
2025 {
2026 const REAL_VALUE_TYPE *t = ten_to_ptwo (n - 1);
2027 do_multiply (&tens[n], t, t);
2028 }
2029 }
2030
2031 return &tens[n];
2032 }
函数do_multiply只接受REAL_VALUE_TYPE形式的参数。这就是上面调用ten_to_ptwo构建REAL_VALUE_TYPE形式数值10的原因。do_multiply执行计算r = a * b。首先,检查浮点数的特殊情形。下面CLASS2的定义为:#define CLASS2(A, B) ((A) << 2 | (B)),它构建对应这对浮点数类型的唯一数值。
662 static bool
663 do_multiply (REAL_VALUE_TYPE *r, const REAL_VALUE_TYPE *a, in real.c
664 const REAL_VALUE_TYPE *b)
665 {
666 REAL_VALUE_TYPE u, t, *rr;
667 unsigned int i, j, k;
668 int sign = a->sign ^ b->sign;
669 bool inexact = false;
670
671 switch (CLASS2 (a->class, b->class))
672 {
673 case CLASS2 (rvc_zero, rvc_zero):
674 case CLASS2 (rvc_zero, rvc_normal):
675 case CLASS2 (rvc_normal, rvc_zero):
676 /* +-0 * ANY = 0 with appropriate sign. */
677 get_zero (r, sign);
678 return false;
679
680 case CLASS2 (rvc_zero, rvc_nan):
681 case CLASS2 (rvc_normal, rvc_nan):
682 case CLASS2 (rvc_inf, rvc_nan):
683 case CLASS2 (rvc_nan, rvc_nan):
684 /* ANY * NaN = NaN. */
685 *r = *b;
686 r->sign = sign;
687 return false;
688
689 case CLASS2 (rvc_nan, rvc_zero):
690 case CLASS2 (rvc_nan, rvc_normal):
691 case CLASS2 (rvc_nan, rvc_inf):
692 /* NaN * ANY = NaN. */
693 *r = *a;
694 r->sign = sign;
695 return false;
696
697 case CLASS2 (rvc_zero, rvc_inf):
698 case CLASS2 (rvc_inf, rvc_zero):
699 /* 0 * Inf = NaN */
700 get_canonical_qnan (r, sign);
701 return false;
702
703 case CLASS2 (rvc_inf, rvc_inf):
704 case CLASS2 (rvc_normal, rvc_inf):
705 case CLASS2 (rvc_inf, rvc_normal):
706 /* Inf * Inf = Inf, R * Inf = Inf */
707 get_inf (r, sign);
708 return false;
709
710 case CLASS2 (rvc_normal, rvc_normal):
711 break;
712
713 default:
714 abort ();
715 }
716
717 if (r == a || r == b)
718 rr = &t;
719 else
720 rr = r;
721 get_zero (rr, 0);
722
723 /* Collect all the partial products. Since we don't have sure access
724 to a widening multiply, we split each long into two half-words.
725
726 Consider the long-hand form of a four half-word multiplication:
727
728 A B C D
729 * E F G H
730 --------------------
731 DE DF DG DH
732 CE CF CG CH
733 BE BF BG BH
734 AE AF AG AH
735
736 We construct partial products of the widened half-word products
737 that are known to not overlap, e.g. DF+DH. Each such partial
738 product is given its proper exponent, which allows us to sum them
739 and obtain the finished product. */
740
741 for (i = 0; i < SIGSZ * 2; ++i)
742 {
743 unsigned long ai = a->sig[i / 2];
744 if (i & 1)
745 ai >>= HOST_BITS_PER_LONG / 2;
746 else
747 ai &= ((unsigned long)1 << (HOST_BITS_PER_LONG / 2)) - 1;
748
749 if (ai == 0)
750 continue;
751
752 for (j = 0; j < 2; ++j)
753 {
754 int exp = (a->exp - (2*SIGSZ-1-i)*(HOST_BITS_PER_LONG/2)
755 + (b->exp - (1-j)*(HOST_BITS_PER_LONG/2)));
756
757 if (exp > MAX_EXP)
758 {
759 get_inf (r, sign);
760 return true;
761 }
762 if (exp < -MAX_EXP)
763 {
764 /* Would underflow to zero, which we shouldn't bother adding. */
765 inexact = true;
766 continue;
767 }
768
769 memset (&u, 0, sizeof (u));
770 u.class = rvc_normal;
771 u.exp = exp;
772
773 for (k = j; k < SIGSZ * 2; k += 2)
774 {
775 unsigned long bi = b->sig[k / 2];
776 if (k & 1)
777 bi >>= HOST_BITS_PER_LONG / 2;
778 else
779 bi &= ((unsigned long)1 << (HOST_BITS_PER_LONG / 2)) - 1;
780
781 u.sig[k / 2] = ai * bi;
782 }
783
784 normalize (&u);
785 inexact |= do_add (rr, rr, &u, 0);
786 }
787 }
788
789 rr->sign = sign;
790 if (rr != r)
791 *r = t;
792
793 return inexact;
794 }
如果是正常的浮点数,717行以下的代码将被执行。注意,虽然我们现在处理的是十进制形式的浮点数,但是REAL_VALUE_TYPE却是16进制的表示形式。这里的初始值0及倍数10都是REAL_VALUE_TYPE的形式。包括在real_from_string的1872及1887行对读入数字的处理也是通过real_digit为其生成REAL_VALUE_TYPE形式的值。
2052 static const REAL_VALUE_TYPE *
2053 real_digit (int n) in real.c
2054 {
2055 static REAL_VALUE_TYPE num[10];
2056
2057 if (n < 0 || n > 9)
2058 abort ();
2059
2060 if (n > 0 && num[n].class == rvc_zero)
2061 real_from_integer (&num[n], VOIDmode, n, 0, 1);
2062
2063 return &num[n];
2064 }
REAL_VALUE_TYPE的尾数位部分是long类型的数组,SIGSZ是这个数组的大小。那么,754到767行,检查731到734行注释中所显示的部分积的指数部分是否溢出,而784行的normalize会对整个部分积的结果检查溢出,并尽可能保留精度(这很重要)。
而由REAL_VALUE_TYPE形式所表示的值的加减法r = a + b或r = a - b,要由do_add来执行。
522 static bool
523 do_add (REAL_VALUE_TYPE *r, const REAL_VALUE_TYPE *a, in real.c
524 const REAL_VALUE_TYPE *b, int subtract_p)
525 {
526 int dexp, sign, exp;
527 REAL_VALUE_TYPE t;
528 bool inexact = false;
529
530 /* Determine if we need to add or subtract. */
531 sign = a->sign;
532 subtract_p = (sign ^ b->sign) ^ subtract_p;
533
534 switch (CLASS2 (a->class, b->class))
535 {
536 case CLASS2 (rvc_zero, rvc_zero):
537 /* -0 + -0 = -0, -0 - +0 = -0; all other cases yield +0. */
538 get_zero (r, sign & !subtract_p);
539 return false;
540
541 case CLASS2 (rvc_zero, rvc_normal):
542 case CLASS2 (rvc_zero, rvc_inf):
543 case CLASS2 (rvc_zero, rvc_nan):
544 /* 0 + ANY = ANY. */
545 case CLASS2 (rvc_normal, rvc_nan):
546 case CLASS2 (rvc_inf, rvc_nan):
547 case CLASS2 (rvc_nan, rvc_nan):
548 /* ANY + NaN = NaN. */
549 case CLASS2 (rvc_normal, rvc_inf):
550 /* R + Inf = Inf. */
551 *r = *b;
552 r->sign = sign ^ subtract_p;
553 return false;
554
555 case CLASS2 (rvc_normal, rvc_zero):
556 case CLASS2 (rvc_inf, rvc_zero):
557 case CLASS2 (rvc_nan, rvc_zero):
558 /* ANY + 0 = ANY. */
559 case CLASS2 (rvc_nan, rvc_normal):
560 case CLASS2 (rvc_nan, rvc_inf):
561 /* NaN + ANY = NaN. */
562 case CLASS2 (rvc_inf, rvc_normal):
563 /* Inf + R = Inf. */
564 *r = *a;
565 return false;
566
567 case CLASS2 (rvc_inf, rvc_inf):
568 if (subtract_p)
569 /* Inf - Inf = NaN. */
570 get_canonical_qnan (r, 0);
571 else
572 /* Inf + Inf = Inf. */
573 *r = *a;
574 return false;
575
576 case CLASS2 (rvc_normal, rvc_normal):
577 break;
578
579 default:
580 abort ();
581 }
582
583 /* Swap the arguments such that A has the larger exponent. */
584 dexp = a->exp - b->exp;
585 if (dexp < 0)
586 {
587 const REAL_VALUE_TYPE *t;
588 t = a, a = b, b = t;
589 dexp = -dexp;
590 sign ^= subtract_p;
591 }
592 exp = a->exp;
593
594 /* If the exponents are not identical, we need to shift the
595 significand of B down. */
596 if (dexp > 0)
597 {
598 /* If the exponents are too far apart, the significands
599 do not overlap, which makes the subtraction a noop. */
600 if (dexp >= SIGNIFICAND_BITS)
601 {
602 *r = *a;
603 r->sign = sign;
604 return true;
605 }
606
607 inexact |= sticky_rshift_significand (&t, b, dexp);
608 b = &t;
609 }
610
611 if (subtract_p)
612 {
613 if (sub_significands (r, a, b, inexact))
614 {
615 /* We got a borrow out of the subtraction. That means that
616 A and B had the same exponent, and B had the larger
617 significand. We need to swap the sign and negate the
618 significand. */
619 sign ^= 1;
620 neg_significand (r, r);
621 }
622 }
623 else
624 {
625 if (add_significands (r, a, b))
626 {
627 /* We got carry out of the addition. This means we need to
628 shift the significand back down one bit and increase the
629 exponent. */
630 inexact |= sticky_rshift_significand (r, r, 1);
631 r->sig[SIGSZ-1] |= SIG_MSB;
632 if (++exp > MAX_EXP)
633 {
634 get_inf (r, sign);
635 return true;
636 }
637 }
638 }
639
640 r->class = rvc_normal;
641 r->sign = sign;
642 r->exp = exp;
643 /* Zero out the remaining fields. */
644 r->signalling = 0;
645 r->canonical = 0;
646
647 /* Re-normalize the result. */
648 normalize (r);
649
650 /* Special case: if the subtraction results in zero, the result
651 is positive. */
652 if (r->class == rvc_zero)
653 r->sign = 0;
654 else
655 r->sig[0] |= inexact;
656
657 return inexact;
658 }
加减法的操作数的指数部分必须要调整到一致。在上面585至591行,我们把b设为指数部分更大的值,并右移使其与a的指数部分对齐。如果在计算过程中丢了位,inexact将为1,那么在655行把结果的最低位设为1(类似四舍五入)。
因为十进制表达形式的指数部分以10为底,那么在real_from_string的1926行,这个指数部分要由times_pten与尾数部分进行计算。
2068 static void
2069 times_pten (REAL_VALUE_TYPE *r, int exp) in real.c
2070 {
2071 REAL_VALUE_TYPE pten, *rr;
2072 bool negative = (exp < 0);
2073 int i;
2074
2075 if (negative)
2076 {
2077 exp = -exp;
2078 pten = *real_digit (1);
2079 rr = &pten;
2080 }
2081 else
2082 rr = r;
2083
2084 for (i = 0; exp > 0; ++i, exp >>= 1)
2085 if (exp & 1)
2086 do_multiply (rr, rr, ten_to_ptwo (i));
2087
2088 if (negative)
2089 do_divide (r, r, &pten);
2090 }
对于指数为负数时,我们把它转为除法运算,这里我们不深入do_divide了,它里面进行的是2进制的除法。从real_from_string回来,虽然我们可以2种形式给出浮点数常量,但最终它们都只有REAL_VALUE_TYPE这种16进制的形式。然后在interpret_float的612行,real_convert将该值扩展或裁剪至符合指定的类型。一切非常完美。