libc/printf: Use compiler-provided 64 bit math, phase 1

The _to_float() implementation had a somewhat kludgey hand-written 64
bit math implementation, which is unhelpful on Zephyr as all our
toolchains provide a working uint64_t runtime.  This is at best just
dupicated code from libgcc, and at worst less efficient.

This patch replaces the existing 64 bit minilibrary but keeps the
uint32_t[2] API as is for ease of validation and review.

One exception is _ldiv5, a specialized divide-by-five implementation.
The 64 bit division routines are large on some architectures (ARM and
ARC in particular), not pulled in by a default Zephyr build, and will
swamp the benefit from this patch.  So this includes a
refactored/improved _ldiv5 which leverages libgcc for multiword shifts
instead of just using raw division.

Note also the "noinline" attribute on _ladd().  This is a workaround
for an apparent compiler bug when built with -Og or -Os (hand-hacking
the Makefiles to build with -O0 works), perhaps due to my aliasing the
int array with a long long.  This will go away in phase 2.

Change-Id: I63e8c82dabe2bfaa75b63ddb59e5f11d51be538e
Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
Andy Ross 2016-08-15 11:31:58 -07:00 committed by Anas Nashif
parent 073cfddd0f
commit d06eea4eda

View file

@ -139,113 +139,69 @@ static int _to_dec(char *buf, int32_t value, int fplus, int fspace, int precisio
static void _llshift(uint32_t value[])
{
if (value[0] & 0x80000000)
value[1] = (value[1] << 1) | 1;
else
value[1] <<= 1;
value[0] <<= 1;
*((uint64_t *)&value[0]) <<= 1;
}
static void _lrshift(uint32_t value[])
static void __attribute__((noinline)) _ladd(uint32_t *result, uint32_t *value)
{
if (value[1] & 1)
value[0] = (value[0] >> 1) | 0x80000000;
else
value[0] = (value[0] >> 1) & 0x7FFFFFFF;
value[1] = (value[1] >> 1) & 0x7FFFFFFF;
}
static void _ladd(uint32_t result[], uint32_t value[])
{
uint32_t carry;
uint32_t temp;
carry = 0;
temp = result[0] + value[0];
if (result[0] & 0x80000000) {
if ((value[0] & 0x80000000) || ((temp & 0x80000000) == 0))
carry = 1;
} else {
if ((value[0] & 0x80000000) && ((temp & 0x80000000) == 0))
carry = 1;
}
result[0] = temp;
result[1] = result[1] + value[1] + carry;
uint64_t *r = (uint64_t *)&result[0];
uint64_t *v = (uint64_t *)&value[0];
*r = *r + *v;
}
static void _rlrshift(uint32_t value[])
{
uint32_t temp[2];
temp[0] = value[0] & 1;
temp[1] = 0;
_lrshift(value);
_ladd(value, temp);
uint64_t *v = (uint64_t *)&value[0];
*v = (*v & 1) + (*v >> 1);
}
/*
* 64 bit divide by 5 function for _to_float.
* The result is ROUNDED, not TRUNCATED.
*/
static void _ldiv5(uint32_t value[])
/* Tiny integer divide-by-five routine. The full 64 bit division
* implementations in libgcc are very large on some architectures, and
* currently nothing in Zephyr pulls it into the link. So it makes
* sense to define this much smaller special case here to avoid
* including it just for printf.
*
* It works by iteratively dividing the most significant 32 bits of
* the 64 bit value by 5. This will leave a remainder of 0-4
* (i.e. three significant bits), ensuring that the top 29 bits of the
* remainder are zero for the next iteration. Thus in the second
* iteration only 35 significant bits remain, and in the third only
* six. This was tested exhaustively through the first ~10B values in
* the input space, and for ~2e12 (4 hours runtime) random inputs
* taken from the full 64 bit space.
*/
static void _ldiv5(uint32_t value[])
{
uint32_t result[2];
register int shift;
uint32_t temp1[2];
uint32_t temp2[2];
uint64_t *v = (uint64_t *)&value[0];
uint32_t i, hi;
uint64_t rem = *v, quot = 0, q;
static const char shifts[] = { 32, 3, 0 };
result[0] = 0; /* Result accumulator */
result[1] = value[1] / 5;
temp1[0] = value[0]; /* Dividend for this pass */
temp1[1] = value[1] % 5;
temp2[1] = 0;
/* Usage in this file wants rounded behavior, not truncation. So add
* two to get the threshold right.
*/
rem += 2;
while (1) {
for (shift = 0; temp1[1] != 0; shift++)
_lrshift(temp1);
temp2[0] = temp1[0] / 5;
if (temp2[0] == 0) {
if (temp1[0] % 5 > (5 / 2)) {
temp1[0] = 1;
_ladd(result, temp1);
}
break;
}
temp1[0] = temp2[0];
while (shift-- != 0)
_llshift(temp1);
_ladd(result, temp1); /* Update result accumulator */
temp1[0] = result[0];
temp1[1] = result[1];
_llshift(temp1); /* Compute (current_result*5) */
_llshift(temp1);
_ladd(temp1, result);
temp1[0] = ~temp1[0]; /* Compute -(current_result*5) */
temp1[1] = ~temp1[1];
temp2[0] = 1;
_ladd(temp1, temp2);
_ladd(temp1, value); /* Compute #-(current_result*5) */
for (i = 0; i < 3; i++) {
hi = rem >> shifts[i];
q = (uint64_t)(hi / 5) << shifts[i];
rem -= q * 5;
quot += q;
}
value[0] = result[0];
value[1] = result[1];
*v = quot;
}
static char _get_digit(uint32_t fract[], int *digit_count)
{
int rval;
uint32_t temp[2];
uint64_t *fr = (uint64_t *)&fract[0];
if (*digit_count > 0) {
*digit_count -= 1;
temp[0] = fract[0];
temp[1] = fract[1];
_llshift(fract); /* Multiply by 10 */
_llshift(fract);
_ladd(fract, temp);
_llshift(fract);
rval = ((fract[1] >> 28) & 0xF) + '0';
fract[1] &= 0x0FFFFFFF;
*fr = *fr * 10;
rval = ((*fr >> 60) & 0xF) + '0';
*fr &= 0x0FFFFFFFFFFFFFFFull;
} else
rval = '0';
return (char) (rval);