Sunday, November 03, 2024

Re: bc cannot calculate very big number

On Sun, Nov 03, 2024 at 08:55:18AM +0100, Otto Moerbeek wrote:

> On Sun, Nov 03, 2024 at 08:44:26AM +0100, Sebastien Marie wrote:
>
> > Otto Moerbeek <otto@drijf.net> writes:
> >
> > > On Sun, Nov 03, 2024 at 08:13:20AM +0100, Otto Moerbeek wrote:
> > >
> > >> On Sun, Nov 03, 2024 at 09:38:47AM +0700, hahahahacker2009 wrote:
> > >>
> > >> > Hello,
> > >> > OpenBSD bc(1) is unable to calculate very big number,
> > >> > for example, (1024*1024)^(1024*1024) run for an hour and still cannot
> > >> > give me the result.
> > >> > It just use about 11M of memory and 100% CPU.
> > >> > Gavin Howard's bc port do it in 2 minutes
> > >> > GNU bc on Linux do it in 5 minutes.
> > >>
> > >> bc uses dc which does a simple exponentation computation, which is
> > >> basically doing repeated multiplications. I'm sure there are smarter
> > >> methods, it's not just implemented that way.
> > >
> > > Oh, I looked and I did it a bit smarter when I wrote that code 20
> > > years back, but still, I think it can be improved.
> > >
> >
> > The problem seems more on numnber printing than on number exponentation.
> >
> > $ /usr/bin/time -lp dc -e '1024 1024 * 1024 1024 * ^'
> > real 46.39
> > user 45.66
> > sys 0.00
> > 15244 maximum resident set size
> > 0 average shared memory size
> > 0 average unshared data size
> > 0 average unshared stack size
> > 3509 minor page faults
> > 1 major page faults
> > 0 swaps
> > 0 block input operations
> > 0 block output operations
> > 0 messages sent
> > 0 messages received
> > 0 signals received
> > 0 voluntary context switches
> > 3334 involuntary context switches
> >
> > The exponentation took ~50 seconds (dc(1) doesn't print the number on
> > the stack by default).
> > --
> > Sebastien Marie
>
> Heh, thanks for that insight. I might take a look some day.
>
> -Otto
>

That day came sooner than expected.

The root cause of the speed difference is that my dc/bc compute in
binary, while the others compute in base 10. So they basically do not
need to do a conversion for outputting in base 10.

By avoiding a lot of memory allocations and using specialized
conversion routines for base 10 and base 16 things speed up nicely for
me for those bases (from about 1.5 hour to 5m20s for base 10, and to
28s for base 16 (there the conversion is nearly free).

Don't forget to rebuild bc as as well after applying the diff for dc.

-Otto

Index: inout.c
===================================================================
RCS file: /home/cvs/src/usr.bin/dc/inout.c,v
diff -u -p -r1.23 inout.c
--- inout.c 8 Mar 2023 04:43:10 -0000 1.23
+++ inout.c 3 Nov 2024 20:19:28 -0000
@@ -38,7 +38,7 @@ static void src_freestring(struct source
static void flushwrap(FILE *);
static void putcharwrap(FILE *, int);
static void printwrap(FILE *, const char *);
-static char *get_digit(u_long, int, u_int);
+static void get_digit(u_long, int, u_int, char *, size_t);

static struct vtable stream_vtable = {
src_getcharstream,
@@ -264,20 +264,17 @@ read_string(struct source *src)
return p;
}

-static char *
-get_digit(u_long num, int digits, u_int base)
+static void
+get_digit(u_long num, int digits, u_int base, char *buf, size_t sz)
{
- char *p;
-
if (base <= 16) {
- p = bmalloc(2);
- p[0] = num >= 10 ? num + 'A' - 10 : num + '0';
- p[1] = '\0';
+ buf[0] = num >= 10 ? num + 'A' - 10 : num + '0';
+ buf[1] = '\0';
} else {
- if (asprintf(&p, "%0*lu", digits, num) == -1)
- err(1, NULL);
+ int ret = snprintf(buf, sz, "%0*lu", digits, num);
+ if (ret < 0 || (size_t)ret >= sz)
+ err(1, "truncation %d %zu", ret, sz);
}
- return p;
}

void
@@ -285,11 +282,10 @@ printnumber(FILE *f, const struct number
{
struct number *int_part, *fract_part;
int digits;
- char buf[11];
- size_t sz;
+ char buf[12], *str, *p;
+ size_t allocated;
int i;
- struct stack stack;
- char *p;
+ BN_ULONG *mem;

charcount = 0;
lastchar = -1;
@@ -307,24 +303,49 @@ printnumber(FILE *f, const struct number
}
split_number(b, int_part->number, fract_part->number);

- i = 0;
- stack_init(&stack);
- while (!BN_is_zero(int_part->number)) {
- BN_ULONG rem = BN_div_word(int_part->number, base);
- stack_pushstring(&stack, get_digit(rem, digits, base));
- i++;
- }
- sz = i;
- if (BN_is_negative(b->number))
- putcharwrap(f, '-');
- for (i = 0; i < sz; i++) {
- p = stack_popstring(&stack);
- if (base > 16)
- putcharwrap(f, ' ');
- printwrap(f, p);
- free(p);
+ if (base == 10 && !BN_is_zero(int_part->number)) {
+ str = BN_bn2dec(int_part->number);
+ bn_checkp(str);
+ p = str;
+ while (*p)
+ putcharwrap(f, *p++);
+ free(str);
+ } else if (base == 16 && !BN_is_zero(int_part->number)) {
+ str = BN_bn2hex(int_part->number);
+ bn_checkp(str);
+ p = str;
+ if (*p == '-')
+ putcharwrap(f, *p++);
+ /* skip leading zero's */
+ while (*p == '0')
+ p++;
+ while (*p)
+ putcharwrap(f, *p++);
+ free(str);
+ } else {
+ i = 0;
+ allocated = 1;
+ mem = breallocarray(NULL, allocated, sizeof(BN_ULONG));
+ while (!BN_is_zero(int_part->number)) {
+ if (i >= allocated) {
+ allocated *= 2;
+ mem = breallocarray(mem, allocated,
+ sizeof(BN_ULONG));
+ }
+ mem[i++] = BN_div_word(int_part->number, base);
+ }
+ if (BN_is_negative(b->number))
+ putcharwrap(f, '-');
+ for (i = i - 1; i >= 0; i--) {
+ get_digit(mem[i], digits, base, buf,
+ sizeof(buf));
+ if (base > 16)
+ putcharwrap(f, ' ');
+ printwrap(f, buf);
+ }
+ free(mem);
}
- stack_clear(&stack);
+
if (b->scale > 0) {
struct number *num_base;
BIGNUM *mult, *stop;
@@ -352,13 +373,12 @@ printnumber(FILE *f, const struct number
bmachine_scale());
split_number(fract_part, int_part->number, NULL);
rem = BN_get_word(int_part->number);
- p = get_digit(rem, digits, base);
+ get_digit(rem, digits, base, buf, sizeof(buf));
int_part->scale = 0;
normalize(int_part, fract_part->scale);
bn_check(BN_sub(fract_part->number, fract_part->number,
int_part->number));
- printwrap(f, p);
- free(p);
+ printwrap(f, buf);
bn_check(BN_mul_word(mult, base));
}
free_number(num_base);

No comments:

Post a Comment