Vào Th 2, 4 thg 11, 2024 vào lúc 03:33 Otto Moerbeek <otto@drijf.net> đã viết:
>
> On Sun, Nov 03, 2024 at 08:55:18AM +0100, Otto Moerbeek wrote:
>
> > On Sun, Nov 03, 2024 at 08:44:26AM +0100, Sebastien Marie wrote:
> >
> > > Otto Moerbeek <otto@drijf.net> writes:
> > >
> > > > On Sun, Nov 03, 2024 at 08:13:20AM +0100, Otto Moerbeek wrote:
> > > >
> > > >> On Sun, Nov 03, 2024 at 09:38:47AM +0700, hahahahacker2009 wrote:
> > > >>
> > > >> > Hello,
> > > >> > OpenBSD bc(1) is unable to calculate very big number,
> > > >> > for example, (1024*1024)^(1024*1024) run for an hour and still cannot
> > > >> > give me the result.
> > > >> > It just use about 11M of memory and 100% CPU.
> > > >> > Gavin Howard's bc port do it in 2 minutes
> > > >> > GNU bc on Linux do it in 5 minutes.
> > > >>
> > > >> bc uses dc which does a simple exponentation computation, which is
> > > >> basically doing repeated multiplications. I'm sure there are smarter
> > > >> methods, it's not just implemented that way.
> > > >
> > > > Oh, I looked and I did it a bit smarter when I wrote that code 20
> > > > years back, but still, I think it can be improved.
> > > >
> > >
> > > The problem seems more on numnber printing than on number exponentation.
> > >
> > > $ /usr/bin/time -lp dc -e '1024 1024 * 1024 1024 * ^'
> > > real 46.39
> > > user 45.66
> > > sys 0.00
> > > 15244 maximum resident set size
> > > 0 average shared memory size
> > > 0 average unshared data size
> > > 0 average unshared stack size
> > > 3509 minor page faults
> > > 1 major page faults
> > > 0 swaps
> > > 0 block input operations
> > > 0 block output operations
> > > 0 messages sent
> > > 0 messages received
> > > 0 signals received
> > > 0 voluntary context switches
> > > 3334 involuntary context switches
> > >
> > > The exponentation took ~50 seconds (dc(1) doesn't print the number on
> > > the stack by default).
> > > --
> > > Sebastien Marie
> >
> > Heh, thanks for that insight. I might take a look some day.
> >
> > -Otto
> >
>
> That day came sooner than expected.
>
> The root cause of the speed difference is that my dc/bc compute in
> binary, while the others compute in base 10. So they basically do not
> need to do a conversion for outputting in base 10.
>
> By avoiding a lot of memory allocations and using specialized
> conversion routines for base 10 and base 16 things speed up nicely for
> me for those bases (from about 1.5 hour to 5m20s for base 10, and to
> 28s for base 16 (there the conversion is nearly free).
>
> Don't forget to rebuild bc as as well after applying the diff for dc.
>
> -Otto
>
> Index: inout.c
> ===================================================================
> RCS file: /home/cvs/src/usr.bin/dc/inout.c,v
> diff -u -p -r1.23 inout.c
> --- inout.c 8 Mar 2023 04:43:10 -0000 1.23
> +++ inout.c 3 Nov 2024 20:19:28 -0000
> @@ -38,7 +38,7 @@ static void src_freestring(struct source
> static void flushwrap(FILE *);
> static void putcharwrap(FILE *, int);
> static void printwrap(FILE *, const char *);
> -static char *get_digit(u_long, int, u_int);
> +static void get_digit(u_long, int, u_int, char *, size_t);
>
> static struct vtable stream_vtable = {
> src_getcharstream,
> @@ -264,20 +264,17 @@ read_string(struct source *src)
> return p;
> }
>
> -static char *
> -get_digit(u_long num, int digits, u_int base)
> +static void
> +get_digit(u_long num, int digits, u_int base, char *buf, size_t sz)
> {
> - char *p;
> -
> if (base <= 16) {
> - p = bmalloc(2);
> - p[0] = num >= 10 ? num + 'A' - 10 : num + '0';
> - p[1] = '\0';
> + buf[0] = num >= 10 ? num + 'A' - 10 : num + '0';
> + buf[1] = '\0';
> } else {
> - if (asprintf(&p, "%0*lu", digits, num) == -1)
> - err(1, NULL);
> + int ret = snprintf(buf, sz, "%0*lu", digits, num);
> + if (ret < 0 || (size_t)ret >= sz)
> + err(1, "truncation %d %zu", ret, sz);
> }
> - return p;
> }
>
> void
> @@ -285,11 +282,10 @@ printnumber(FILE *f, const struct number
> {
> struct number *int_part, *fract_part;
> int digits;
> - char buf[11];
> - size_t sz;
> + char buf[12], *str, *p;
> + size_t allocated;
> int i;
> - struct stack stack;
> - char *p;
> + BN_ULONG *mem;
>
> charcount = 0;
> lastchar = -1;
> @@ -307,24 +303,49 @@ printnumber(FILE *f, const struct number
> }
> split_number(b, int_part->number, fract_part->number);
>
> - i = 0;
> - stack_init(&stack);
> - while (!BN_is_zero(int_part->number)) {
> - BN_ULONG rem = BN_div_word(int_part->number, base);
> - stack_pushstring(&stack, get_digit(rem, digits, base));
> - i++;
> - }
> - sz = i;
> - if (BN_is_negative(b->number))
> - putcharwrap(f, '-');
> - for (i = 0; i < sz; i++) {
> - p = stack_popstring(&stack);
> - if (base > 16)
> - putcharwrap(f, ' ');
> - printwrap(f, p);
> - free(p);
> + if (base == 10 && !BN_is_zero(int_part->number)) {
> + str = BN_bn2dec(int_part->number);
> + bn_checkp(str);
> + p = str;
> + while (*p)
> + putcharwrap(f, *p++);
> + free(str);
> + } else if (base == 16 && !BN_is_zero(int_part->number)) {
> + str = BN_bn2hex(int_part->number);
> + bn_checkp(str);
> + p = str;
> + if (*p == '-')
> + putcharwrap(f, *p++);
> + /* skip leading zero's */
> + while (*p == '0')
> + p++;
> + while (*p)
> + putcharwrap(f, *p++);
> + free(str);
> + } else {
> + i = 0;
> + allocated = 1;
> + mem = breallocarray(NULL, allocated, sizeof(BN_ULONG));
> + while (!BN_is_zero(int_part->number)) {
> + if (i >= allocated) {
> + allocated *= 2;
> + mem = breallocarray(mem, allocated,
> + sizeof(BN_ULONG));
> + }
> + mem[i++] = BN_div_word(int_part->number, base);
> + }
> + if (BN_is_negative(b->number))
> + putcharwrap(f, '-');
> + for (i = i - 1; i >= 0; i--) {
> + get_digit(mem[i], digits, base, buf,
> + sizeof(buf));
> + if (base > 16)
> + putcharwrap(f, ' ');
> + printwrap(f, buf);
> + }
> + free(mem);
> }
> - stack_clear(&stack);
> +
> if (b->scale > 0) {
> struct number *num_base;
> BIGNUM *mult, *stop;
> @@ -352,13 +373,12 @@ printnumber(FILE *f, const struct number
> bmachine_scale());
> split_number(fract_part, int_part->number, NULL);
> rem = BN_get_word(int_part->number);
> - p = get_digit(rem, digits, base);
> + get_digit(rem, digits, base, buf, sizeof(buf));
> int_part->scale = 0;
> normalize(int_part, fract_part->scale);
> bn_check(BN_sub(fract_part->number, fract_part->number,
> int_part->number));
> - printwrap(f, p);
> - free(p);
> + printwrap(f, buf);
> bn_check(BN_mul_word(mult, base));
> }
> free_number(num_base);
>
It is quicker than the last time, it can print the result in 30 minutes.
I will send the result of time -lp bc in 30 minutes
No comments:
Post a Comment