[untested PATCH] Save 11 instructions in vfprintf_flt.o

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

[untested PATCH] Save 11 instructions in vfprintf_flt.o

George Spelvin
As part of poking around vfprintf.c, I came across this low-hanging fruit.

I'm waiting to test all of my printf changes together, but I thought
I'd throw it out for comment early.  I presume this sort of thing is okay?

Basically, by reversing the sense of the FL_FLTUPP flag so it has the same polarity
as bit 5 of the ASCII character set, you can save some code.  Both getting the
flag from the format letter, and copying the flag to the output.

The difference is 16 bytes on <= avr31, 20 bytes on avr35, and 22 on the rest:

   /----- Before -----\   /------ After -----\
   text    dec    hex   text    dec    hex filename
   1876   1876    754   1860   1860    744 ./avr/lib/avr2/vfprintf_flt.o
   1876   1876    754   1860   1860    744 ./avr/lib/avr2/tiny-stack/vfprintf_flt.o
   1702   1702    6a6   1682   1682    692 ./avr/lib/avr25/vfprintf_flt.o
   1702   1702    6a6   1682   1682    692 ./avr/lib/avr25/tiny-stack/vfprintf_flt.o
   1942   1942    796   1926   1926    786 ./avr/lib/avr3/vfprintf_flt.o
   1948   1948    79c   1932   1932    78c ./avr/lib/avr31/vfprintf_flt.o
   1770   1770    6ea   1750   1750    6d6 ./avr/lib/avr35/vfprintf_flt.o
   1704   1704    6a8   1682   1682    692 ./avr/lib/avr4/vfprintf_flt.o
   1768   1768    6e8   1746   1746    6d2 ./avr/lib/avr5/vfprintf_flt.o
   1850   1850    73a   1828   1828    724 ./avr/lib/avr51/vfprintf_flt.o
   1850   1850    73a   1828   1828    724 ./avr/lib/avr6/vfprintf_flt.o
   1768   1768    6e8   1746   1746    6d2 ./avr/lib/avrxmega2/vfprintf_flt.o
   1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega4/vfprintf_flt.o
   1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega5/vfprintf_flt.o
   1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega6/vfprintf_flt.o
   1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega7/vfprintf_flt.o

Here's the diff.  (The changes of "/* no break */" to "/* FALLTHROUGH */"
are to silence GCC 7's new fallthrough warning.)

diff --git a/avr-libc/libc/stdio/vfprintf.c b/avr-libc/libc/stdio/vfprintf.c
index 3ba6f9a9..83849432 100644
--- a/avr-libc/libc/stdio/vfprintf.c
+++ b/avr-libc/libc/stdio/vfprintf.c
@@ -114,6 +114,22 @@
 })
 #endif
 
+/*
+ * Copy bit (src & smask) to (dst & dmask).
+ *
+ * Unlike "if (src & smask) dst |= dmask", which is also two instructions
+ * and two cycles, this overwrites the destination bit (clearing it
+ * if necessary), and has fewer constraints; it can operate on the low
+ * 16 registers.
+ */
+#define COPYBIT(dst, dmask, src, smask) \
+    asm( "bst %2,%3" \
+ "\n bld %0,%1" \
+ : "=r" (dst) \
+ : "I" (ntz(dmask)), \
+  "r" (src), \
+  "I" (ntz(smask)))
+
 /* -------------------------------------------------------------------- */
 #if  PRINTF_LEVEL <= PRINTF_MIN
 
@@ -219,7 +235,7 @@ vfprintf (FILE * stream, const char *fmt, va_list ap)
  goto ultoa;
       case 'p':
         flags |= FL_ALT;
- /* no break */
+ /* FALLTHROUGH */
       case 'x':
  flags |= (FL_ALTHEX | FL_ALTLWR);
         base = 16;
@@ -278,7 +294,7 @@ vfprintf (FILE * stream, const char *fmt, va_list ap)
 #define FL_ALTUPP FL_PLUS
 #define FL_ALTHEX FL_SPACE
 
-#define FL_FLTUPP FL_ALT
+#define FL_FLTLWR FL_ALT
 #define FL_FLTEXP FL_PREC
 #define FL_FLTFIX FL_LONG
 
@@ -367,23 +383,22 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
 # error
 #endif
 
-#if PRINTF_LEVEL >= PRINTF_FLT
- if (c >= 'E' && c <= 'G') {
-    flags |= FL_FLTUPP;
-    c += 'e' - 'E';
-    goto flt_oper;
-
- } else if (c >= 'e' && c <= 'g') {
-
+ if ((c >= 'E' && c <= 'G') || (c >= 'e' && c <= 'g')) {
+#if PRINTF_LEVEL < PRINTF_FLT
+    /* Float printf not supported; stub */
+    (void) va_arg (ap, double);
+    buf[0] = '?';
+    goto buf_addr;
+#else
     int exp; /* exponent of master decimal digit */
     int n;
     unsigned char vtype; /* result of float value parse */
     unsigned char sign; /* sign character (or 0) */
 # define ndigs c /* only for this block, undef is below */
 
-    flags &= ~FL_FLTUPP;
+    COPYBIT(flags, FL_FLTLWR, c, 'e'-'E');
+    c |= 'e' - 'E';
 
-  flt_oper:
     if (!(flags & FL_PREC))
  prec = 6;
     flags &= ~(FL_FLTEXP | FL_FLTFIX);
@@ -434,11 +449,9 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
 # if ('I'-'i' != 'N'-'n') || ('I'-'i' != 'F'-'f') || ('I'-'i' != 'A'-'a')
 #  error
 # endif
- while ( (ndigs = pgm_read_byte(p)) != 0) {
-    if (flags & FL_FLTUPP)
- ndigs += 'I' - 'i';
+ while ( (ndigs = pgm_read_byte(p++)) != 0) {
+    COPYBIT(ndigs, 'i'-'I', flags, FL_FLTLWR);
     putc (ndigs, stream);
-    p++;
  }
  goto tail;
     }
@@ -523,7 +536,9 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
  }
 
  /* exponent */
- putc (flags & FL_FLTUPP ? 'E' : 'e', stream);
+ ndigs = 'E';
+ COPYBIT(ndigs, 'e'-'E', flags, FL_FLTLWR);
+ putc (ndigs, stream);
  ndigs = '+';
  if (exp < 0 || (exp == 0 && (vtype & FTOA_CARRY) != 0)) {
     exp = -exp;
@@ -538,16 +553,8 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
 
     goto tail;
 # undef ndigs
- }
-
-#else /* to: PRINTF_LEVEL >= PRINTF_FLT */
- if ((c >= 'E' && c <= 'G') || (c >= 'e' && c <= 'g')) {
-    (void) va_arg (ap, double);
-    buf[0] = '?';
-    goto buf_addr;
- }
-
 #endif
+ }
 
  {
     const char * pnt;
@@ -618,7 +625,7 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
  goto ultoa;
       case 'p':
         flags |= FL_ALT;
- /* no break */
+ /* FALLTHROUGH */
       case 'x':
  if (flags & FL_ALT)
     flags |= FL_ALTHEX;

_______________________________________________
AVR-libc-dev mailing list
[hidden email]
https://lists.nongnu.org/mailman/listinfo/avr-libc-dev
Reply | Threaded
Open this post in threaded view
|

Re: [untested PATCH] Save 11 instructions in vfprintf_flt.o

Georg-Johann Lay-2
George Spelvin schrieb:

> As part of poking around vfprintf.c, I came across this low-hanging fruit.
>
> I'm waiting to test all of my printf changes together, but I thought
> I'd throw it out for comment early.  I presume this sort of thing is okay?
>
> Basically, by reversing the sense of the FL_FLTUPP flag so it has the same polarity
> as bit 5 of the ASCII character set, you can save some code.  Both getting the
> flag from the format letter, and copying the flag to the output.
>
> The difference is 16 bytes on <= avr31, 20 bytes on avr35, and 22 on the rest:
>
>    /----- Before -----\   /------ After -----\
>    text    dec    hex   text    dec    hex filename
>    1876   1876    754   1860   1860    744 ./avr/lib/avr2/vfprintf_flt.o
>    1876   1876    754   1860   1860    744 ./avr/lib/avr2/tiny-stack/vfprintf_flt.o
>    1702   1702    6a6   1682   1682    692 ./avr/lib/avr25/vfprintf_flt.o
>    1702   1702    6a6   1682   1682    692 ./avr/lib/avr25/tiny-stack/vfprintf_flt.o
>    1942   1942    796   1926   1926    786 ./avr/lib/avr3/vfprintf_flt.o
>    1948   1948    79c   1932   1932    78c ./avr/lib/avr31/vfprintf_flt.o
>    1770   1770    6ea   1750   1750    6d6 ./avr/lib/avr35/vfprintf_flt.o
>    1704   1704    6a8   1682   1682    692 ./avr/lib/avr4/vfprintf_flt.o
>    1768   1768    6e8   1746   1746    6d2 ./avr/lib/avr5/vfprintf_flt.o
>    1850   1850    73a   1828   1828    724 ./avr/lib/avr51/vfprintf_flt.o
>    1850   1850    73a   1828   1828    724 ./avr/lib/avr6/vfprintf_flt.o
>    1768   1768    6e8   1746   1746    6d2 ./avr/lib/avrxmega2/vfprintf_flt.o
>    1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega4/vfprintf_flt.o
>    1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega5/vfprintf_flt.o
>    1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega6/vfprintf_flt.o
>    1838   1838    72e   1816   1816    718 ./avr/lib/avrxmega7/vfprintf_flt.o
>
> Here's the diff.  (The changes of "/* no break */" to "/* FALLTHROUGH */"
> are to silence GCC 7's new fallthrough warning.)
>
> diff --git a/avr-libc/libc/stdio/vfprintf.c b/avr-libc/libc/stdio/vfprintf.c
> index 3ba6f9a9..83849432 100644
> --- a/avr-libc/libc/stdio/vfprintf.c
> +++ b/avr-libc/libc/stdio/vfprintf.c
> @@ -114,6 +114,22 @@
>  })
>  #endif
>  
> +/*
> + * Copy bit (src & smask) to (dst & dmask).
> + *
> + * Unlike "if (src & smask) dst |= dmask", which is also two instructions

This is confusing because the BST + BLD code below is not a replacement
for what the C code is indicating.  For example the C code never clears
the bit as opposed to BLD.

> + * and two cycles, this overwrites the destination bit (clearing it
> + * if necessary), and has fewer constraints; it can operate on the low
> + * 16 registers.
> + */
> +#define COPYBIT(dst, dmask, src, smask) \
> +    asm( "bst %2,%3" \
> + "\n bld %0,%1" \
> + : "=r" (dst) \

This is wrong because the old value of dst does not die here:
all bits except %1 are surviving. Correct constraint is "+r".

> + : "I" (ntz(dmask)), \
> +  "r" (src), \
> +  "I" (ntz(smask)))
> +
>  /* -------------------------------------------------------------------- */
>  #if  PRINTF_LEVEL <= PRINTF_MIN
>  
> @@ -219,7 +235,7 @@ vfprintf (FILE * stream, const char *fmt, va_list ap)
>   goto ultoa;
>        case 'p':
>          flags |= FL_ALT;
> - /* no break */
> + /* FALLTHROUGH */
>        case 'x':
>   flags |= (FL_ALTHEX | FL_ALTLWR);
>          base = 16;
> @@ -278,7 +294,7 @@ vfprintf (FILE * stream, const char *fmt, va_list ap)
>  #define FL_ALTUPP FL_PLUS
>  #define FL_ALTHEX FL_SPACE
>  
> -#define FL_FLTUPP FL_ALT
> +#define FL_FLTLWR FL_ALT
>  #define FL_FLTEXP FL_PREC
>  #define FL_FLTFIX FL_LONG
>  
> @@ -367,23 +383,22 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
>  # error
>  #endif
>  
> -#if PRINTF_LEVEL >= PRINTF_FLT
> - if (c >= 'E' && c <= 'G') {
> -    flags |= FL_FLTUPP;
> -    c += 'e' - 'E';
> -    goto flt_oper;
> -
> - } else if (c >= 'e' && c <= 'g') {
> -
> + if ((c >= 'E' && c <= 'G') || (c >= 'e' && c <= 'g')) {
> +#if PRINTF_LEVEL < PRINTF_FLT
> +    /* Float printf not supported; stub */
> +    (void) va_arg (ap, double);
> +    buf[0] = '?';
> +    goto buf_addr;
> +#else
>      int exp; /* exponent of master decimal digit */
>      int n;
>      unsigned char vtype; /* result of float value parse */
>      unsigned char sign; /* sign character (or 0) */
>  # define ndigs c /* only for this block, undef is below */
>  
> -    flags &= ~FL_FLTUPP;
> +    COPYBIT(flags, FL_FLTLWR, c, 'e'-'E');
> +    c |= 'e' - 'E';
>  
> -  flt_oper:
>      if (!(flags & FL_PREC))
>   prec = 6;
>      flags &= ~(FL_FLTEXP | FL_FLTFIX);
> @@ -434,11 +449,9 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
>  # if ('I'-'i' != 'N'-'n') || ('I'-'i' != 'F'-'f') || ('I'-'i' != 'A'-'a')
>  #  error
>  # endif
> - while ( (ndigs = pgm_read_byte(p)) != 0) {
> -    if (flags & FL_FLTUPP)
> - ndigs += 'I' - 'i';
> + while ( (ndigs = pgm_read_byte(p++)) != 0) {
> +    COPYBIT(ndigs, 'i'-'I', flags, FL_FLTLWR);
>      putc (ndigs, stream);
> -    p++;
>   }
>   goto tail;
>      }
> @@ -523,7 +536,9 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
>   }
>  
>   /* exponent */
> - putc (flags & FL_FLTUPP ? 'E' : 'e', stream);
> + ndigs = 'E';
> + COPYBIT(ndigs, 'e'-'E', flags, FL_FLTLWR);
> + putc (ndigs, stream);
>   ndigs = '+';
>   if (exp < 0 || (exp == 0 && (vtype & FTOA_CARRY) != 0)) {
>      exp = -exp;
> @@ -538,16 +553,8 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
>  
>      goto tail;
>  # undef ndigs
> - }
> -
> -#else /* to: PRINTF_LEVEL >= PRINTF_FLT */
> - if ((c >= 'E' && c <= 'G') || (c >= 'e' && c <= 'g')) {
> -    (void) va_arg (ap, double);
> -    buf[0] = '?';
> -    goto buf_addr;
> - }
> -
>  #endif
> + }
>  
>   {
>      const char * pnt;
> @@ -618,7 +625,7 @@ int vfprintf (FILE * stream, const char *fmt, va_list ap)
>   goto ultoa;
>        case 'p':
>          flags |= FL_ALT;
> - /* no break */
> + /* FALLTHROUGH */
>        case 'x':
>   if (flags & FL_ALT)
>      flags |= FL_ALTHEX;
>
> _______________________________________________
> AVR-libc-dev mailing list
> [hidden email]
> https://lists.nongnu.org/mailman/listinfo/avr-libc-dev
>


_______________________________________________
AVR-libc-dev mailing list
[hidden email]
https://lists.nongnu.org/mailman/listinfo/avr-libc-dev
Reply | Threaded
Open this post in threaded view
|

Re: [untested PATCH] Save 11 instructions in vfprintf_flt.o

George Spelvin
>> + * Unlike "if (src & smask) dst |= dmask", which is also two instructions

> This is confusing because the BST + BLD code below is not a replacement
> for what the C code is indicating.  For example the C code never clears
> the bit as opposed to BLD.

>> + * and two cycles, this overwrites the destination bit (clearing it
>> + * if necessary), and has fewer constraints; it can operate on the low
>> + * 16 registers.

That's *exactly* what I was trying to say in the rest of the sentence you
quoted back to me!  Perhaps I should just give the equivalent C code.

> + */
> +#define COPYBIT(dst, dmask, src, smask) \
> +    asm( "bst %2,%3" \
> + "\n bld %0,%1" \
> + : "=r" (dst) \

> This is wrong because the old value of dst does not die here:
> all bits except %1 are surviving. Correct constraint is "+r".

Yes, I notice that myself a few minutes after posting.  It didn't
seem earth-shaking enough to warrant a followup.

It's now:

/*
 * Copy bit (src & smask) to (dst & dmask).  This expands to a pair of
 * bst/bld instructions which transfer the bit via the T register.
 *
 * Equivalent to "dst &= ~dmask; if (src & smask) dst |= dmask;", but is
 * only two instructions and has fewer constraints; it can operate on
 * the low 16 registers.
 */
#ifdef __BUILTIN_AVR_INSERT_BITS
/*
 * Using a GCC builtin is preferable; it gives the optimizer more
 * information and saves a few more bytes.
 *
 * The first argument to __builtin_avr_insert_bits is an array of 8
 * nibbles, each of which indicates the source of the corresponding
 * bit of the resilt.  All are 0xf (return the corresponding dst bit
 * unchanged) except the one corresponding to dmask, which specifies
 * the bit position in src to copy from.
 */
#define COPYBIT(dst, dmask, src, smask) \
    ((dst) = __builtin_avr_insert_bits( \
        ~((15ul-ntz(smask))*(dmask)*(dmask)*(dmask)*(dmask)), \
        src, dst))
#else
#define COPYBIT(dst, dmask, src, smask) \
    asm( "bst %2,%3" \
        "\n bld %0,%1" \
        : "+r" (dst) \
        : "n" (ntz(dmask)), \
          "r" (src), \
          "n" (ntz(smask)))
#endif


_______________________________________________
AVR-libc-dev mailing list
[hidden email]
https://lists.nongnu.org/mailman/listinfo/avr-libc-dev