-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmmix-arith.w
1846 lines (1657 loc) · 58.4 KB
/
mmix-arith.w
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% This file is part of the MMIXware package (c) Donald E Knuth 1999
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
\def\title{MMIX-ARITH}
\def\MMIX{\.{MMIX}}
\def\MMIXAL{\.{MMIXAL}}
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
\def\dts{\mathinner{\ldotp\ldotp}}
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
\def\ff{\\{ff\kern-.05em}}
@s ff TeX
@s bool normal @q unreserve a C++ keyword @>
@s xor normal @q unreserve a C++ keyword @>
@s bignum int
@* Introduction. The subroutines below are used to simulate 64-bit \MMIX\
arithmetic on an old-fashioned 32-bit computer---like the one the author
had when he wrote \MMIXAL\ and the first \MMIX\ simulators in 1998 and 1999.
All operations are fabricated from 32-bit arithmetic, including
a full implementation of the IEEE floating point standard,
assuming only that the \CEE/ compiler has a 32-bit unsigned integer type.
Some day 64-bit machines will be commonplace and the awkward manipulations of
the present program will look quite archaic. Interested readers who have such
computers will be able to convert the code to a pure 64-bit form without
difficulty, thereby obtaining much faster and simpler routines. Meanwhile,
however, we can simulate the future and hope for continued progress.
This program module has a simple structure, intended to make it
suitable for loading with \MMIX\ simulators and assemblers.
@c
#include <stdio.h>
#include <string.h>
#include <ctype.h>
@<Stuff for \CEE/ preprocessor@>@;
typedef enum{@+false,true@+} bool;
@<Tetrabyte and octabyte type definitions@>@;
@<Other type definitions@>@;
@<Global variables@>@;
@<Subroutines@>
@ Subroutines of this program are declared first with a prototype,
as in {\mc ANSI C}, then with an old-style \CEE/ function definition.
Here are some preprocessor commands that make this work correctly with both
new-style and old-style compilers.
@^prototypes for functions@>
@<Stuff for \CEE/ preprocessor@>=
#ifdef __STDC__
#define ARGS(list) list
#else
#define ARGS(list) ()
#endif
@ The definition of type \&{tetra} should be changed, if necessary, so that
it represents an unsigned 32-bit integer.
@^system dependencies@>
@<Tetra...@>=
typedef unsigned int tetra;
/* for systems conforming to the LP-64 data model */
typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */
@ @d sign_bit ((unsigned)0x80000000)
@<Glob...@>=
octa zero_octa; /* |zero_octa.h=zero_octa.l=0| */
octa neg_one={-1,-1}; /* |neg_one.h=neg_one.l=-1| */
octa inf_octa={0x7ff00000,0}; /* floating point $+\infty$ */
octa standard_NaN={0x7ff80000,0}; /* floating point NaN(.5) */
@ It's easy to add and subtract octabytes, if we aren't terribly
worried about speed.
@<Subr...@>=
octa oplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa oplus(y,z) /* compute $y+z$ */
octa y,z;
{@+ octa x;
x.h=y.h+z.h;@+
x.l=y.l+z.l;
if (x.l<y.l) x.h++;
return x;
}
@#
octa ominus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa ominus(y,z) /* compute $y-z$ */
octa y,z;
{@+ octa x;
x.h=y.h-z.h;@+
x.l=y.l-z.l;
if (x.l>y.l) x.h--;
return x;
}
@ In the following subroutine, |delta| is a signed quantity that is
assumed to fit in a signed tetrabyte.
@<Subr...@>=
octa incr @,@,@[ARGS((octa,int))@];@+@t}\6{@>
octa incr(y,delta) /* compute $y+\delta$ */
octa y;
int delta;
{@+ octa x;
x.h=y.h;@+ x.l=y.l+delta;
if (delta>=0) {
if (x.l<y.l) x.h++;
}@+else if (x.l>y.l) x.h--;
return x;
}
@ Left and right shifts are only a bit more difficult.
@<Subr...@>=
octa shift_left @,@,@[ARGS((octa,int))@];@+@t}\6{@>
octa shift_left(y,s) /* shift left by $s$ bits, where $0\le s\le64$ */
octa y;
int s;
{
while (s>=32) y.h=y.l,y.l=0,s-=32;
if (s) {@+register tetra yhl=y.h<<s,ylh=y.l>>(32-s);
y.h=yhl+ylh;@+ y.l<<=s;
}
return y;
}
@#
octa shift_right @,@,@[ARGS((octa,int,int))@];@+@t}\6{@>
octa shift_right(y,s,u) /* shift right, arithmetically if $u=0$ */
octa y;
int s,u;
{
while (s>=32) y.l=y.h, y.h=(u?0: -(y.h>>31)), s-=32;
if (s) {@+register tetra yhl=y.h<<(32-s),ylh=y.l>>s;
y.h=(u? 0:(-(y.h>>31))<<(32-s))+(y.h>>s);@+ y.l=yhl+ylh;
}
return y;
}
@* Multiplication. We need to multiply two unsigned 64-bit integers, obtaining
an unsigned 128-bit product. It is easy to do this on a 32-bit machine
by using Algorithm 4.3.1M of {\sl Seminumerical Algorithms}, with $b=2^{16}$.
@^multiprecision multiplication@>
The following subroutine returns the lower half of the product, and
puts the upper half into a global octabyte called |aux|.
@<Subr...@>=
octa omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa omult(y,z)
octa y,z;
{
register int i,j,k;
tetra u[4],v[4],w[8];
register tetra t;
octa acc;
@<Unpack the multiplier and multiplicand to |u| and |v|@>;
for (j=0;j<4;j++) w[j]=0;
for (j=0;j<4;j++)
if (!v[j]) w[j+4]=0;
else {
for (i=k=0;i<4;i++) {
t=u[i]*v[j]+w[i+j]+k;
w[i+j]=t&0xffff, k=t>>16;
}
w[j+4]=k;
}
@<Pack |w| into the outputs |aux| and |acc|@>;
return acc;
}
@ @<Glob...@>=
octa aux; /* secondary output of subroutines with multiple outputs */
bool overflow; /* set by certain subroutines for signed arithmetic */
@ @<Unpack the mult...@>=
u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]= y.l>>16, u[0]=y.l&0xffff;
v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]= z.l>>16, v[0]=z.l&0xffff;
@ @<Pack |w| into the outputs |aux| and |acc|@>=
aux.h=(w[7]<<16)+w[6], aux.l=(w[5]<<16)+w[4];
acc.h=(w[3]<<16)+w[2], acc.l=(w[1]<<16)+w[0];
@ Signed multiplication has the same lower half product as unsigned
multiplication. The signed upper half product is obtained with at most two
further subtractions, after which the result has overflowed if and only if
the upper half is unequal to 64 copies of the sign bit in the lower half.
@<Subr...@>=
octa signed_omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa signed_omult(y,z)
octa y,z;
{
octa acc;
acc=omult(y,z);
if (y.h&sign_bit) aux=ominus(aux,z);
if (z.h&sign_bit) aux=ominus(aux,y);
overflow=(aux.h!=aux.l || (aux.h^(aux.h>>1)^(acc.h&sign_bit)));
return acc;
}
@* Division. Long division of an unsigned 128-bit integer by an unsigned
64-bit integer is, of course, one of the most challenging routines
needed for \MMIX\ arithmetic. The following program, based on
Algorithm 4.3.1D of {\sl Seminumerical Algorithms}, computes
octabytes $q$ and $r$ such that $(2^{64}x+y)=qz+r$ and $0\le r<z$,
given octabytes $x$, $y$, and~$z$, assuming that $x<z$.
(If $x\ge z$, it simply sets $q=x$ and $r=y$.)
The quotient~$q$ is returned by the subroutine;
the remainder~$r$ is stored in~|aux|.
@^multiprecision division@>
@<Subr...@>=
octa odiv @,@,@[ARGS((octa,octa,octa))@];@+@t}\6{@>
octa odiv(x,y,z)
octa x,y,z;
{
register int i,j,k,n,d;
tetra u[8],v[4],q[4],mask,qhat,rhat,vh,vmh;
register tetra t;
octa acc;
@<Check that |x<z|; otherwise give trivial answer@>;
@<Unpack the dividend and divisor to |u| and |v|@>;
@<Determine the number of significant places |n| in the divisor |v|@>;
@<Normalize the divisor@>;
for (j=3;j>=0;j--) @<Determine the quotient digit |q[j]|@>;
@<Unnormalize the remainder@>;
@<Pack |q| and |u| to |acc| and |aux|@>;
return acc;
}
@ @<Check that |x<z|; otherwise give trivial answer@>=
if (x.h>z.h || (x.h==z.h && x.l>=z.l)) {
aux=y;@+ return x;
}
@ @<Unpack the div...@>=
u[7]=x.h>>16, u[6]=x.h&0xffff, u[5]=x.l>>16, u[4]=x.l&0xffff;
u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]=y.l>>16, u[0]=y.l&0xffff;
v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]=z.l>>16, v[0]=z.l&0xffff;
@ @<Determine the number of significant places |n| in the divisor |v|@>=
for (n=4;v[n-1]==0;n--);
@ We shift |u| and |v| left by |d| places, where |d| is chosen to
make $2^{15}\le v_{n-1}<2^{16}$.
@<Normalize the divisor@>=
vh=v[n-1];
for (d=0;vh<0x8000;d++,vh<<=1);
for (j=k=0; j<n+4; j++) {
t=(u[j]<<d)+k;
u[j]=t&0xffff, k=t>>16;
}
for (j=k=0; j<n; j++) {
t=(v[j]<<d)+k;
v[j]=t&0xffff, k=t>>16;
}
vh=v[n-1];
vmh=(n>1? v[n-2]: 0);
@ @<Unnormalize the remainder@>=
mask=(1<<d)-1;
for (j=3; j>=n; j--) u[j]=0;
for (k=0;j>=0;j--) {
t=(k<<16)+u[j];
u[j]=t>>d, k=t&mask;
}
@ @<Pack |q| and |u| to |acc| and |aux|@>=
acc.h=(q[3]<<16)+q[2], acc.l=(q[1]<<16)+q[0];
aux.h=(u[3]<<16)+u[2], aux.l=(u[1]<<16)+u[0];
@ @<Determine the quotient digit |q[j]|@>=
{
@<Find the trial quotient, $\hat q$@>;
@<Subtract $b^j\hat q v$ from |u|@>;
@<If the result was negative, decrease $\hat q$ by 1@>;
q[j]=qhat;
}
@ @<Find the trial quotient, $\hat q$@>=
t=(u[j+n]<<16)+u[j+n-1];
qhat=t/vh, rhat=t-vh*qhat;
if (n>1) while (qhat==0x10000 || qhat*vmh>(rhat<<16)+u[j+n-2]) {
qhat--, rhat+=vh;
if (rhat>=0x10000) break;
}
@ After this step, |u[j+n]| will either equal |k| or |k-1|. The
true value of~|u| would be obtained by subtracting~|k| from |u[j+n]|;
but we don't have to fuss over |u[j+n]|, because it won't be examined later.
@<Subtract $b^j\hat q v$ from |u|@>=
for (i=k=0; i<n; i++) {
t=u[i+j]+0xffff0000-k-qhat*v[i];
u[i+j]=t&0xffff, k=0xffff-(t>>16);
}
@ The correction here occurs only rarely, but it can be necessary---for
example, when dividing the number \Hex{7fff800100000000} by \Hex{800080020005}.
@<If the result was negative, decrease $\hat q$ by 1@>=
if (u[j+n]!=k) {
qhat--;
for (i=k=0; i<n; i++) {
t=u[i+j]+v[i]+k;
u[i+j]=t&0xffff, k=t>>16;
}
}
@ Signed division can be reduced to unsigned division in a tedious
but straightforward manner. We assume that the divisor isn't zero.
@<Subr...@>=
octa signed_odiv @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa signed_odiv(y,z)
octa y,z;
{
octa yy,zz,q;
register int sy,sz;
if (y.h&sign_bit) sy=2, yy=ominus(zero_octa,y);
else sy=0, yy=y;
if (z.h&sign_bit) sz=1, zz=ominus(zero_octa,z);
else sz=0, zz=z;
q=odiv(zero_octa,yy,zz);
overflow=false;
switch (sy+sz) {
case 2+1: aux=ominus(zero_octa,aux);
if (q.h==sign_bit) overflow=true;
case 0+0: return q;
case 2+0:@+ if (aux.h || aux.l) aux=ominus(zz,aux);
goto negate_q;
case 0+1:@+ if (aux.h || aux.l) aux=ominus(aux,zz);
negate_q:@+ if (aux.h || aux.l) return ominus(neg_one,q);
else return ominus(zero_octa,q);
}
}
@* Bit fiddling. The bitwise operators of \MMIX\ are fairly easy to
implement directly, but three of them occur often enough to deserve
packaging as subroutines.
@<Subr...@>=
octa oand @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa oand(y,z) /* compute $y\land z$ */
octa y,z;
{@+ octa x;
x.h=y.h&z.h;@+ x.l=y.l&z.l;
return x;
}
@#
octa oandn @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa oandn(y,z) /* compute $y\land\bar z$ */
octa y,z;
{@+ octa x;
x.h=y.h&~z.h;@+ x.l=y.l&~z.l;
return x;
}
@#
octa oxor @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa oxor(y,z) /* compute $y\oplus z$ */
octa y,z;
{@+ octa x;
x.h=y.h^z.h;@+ x.l=y.l^z.l;
return x;
}
@ Here's a fun way to count the number of bits in a tetrabyte.
[This classical trick is called the ``Gillies--Miller method
for sideways addition'' in {\sl The Preparation of Programs
for an Electronic Digital Computer\/} by Wilkes, Wheeler, and
Gill, second edition (Reading, Mass.:\ Addison--Wesley, 1957),
191--193. Some of the tricks used here were suggested by
Balbir Singh, Peter Rossmanith, and Stefan Schwoon.]
@^Gillies, Donald Bruce@>
@^Miller, Jeffrey Charles Percy@>
@^Wilkes, Maurice Vincent@>
@^Wheeler, David John@>
@^Gill, Stanley@>
@^Singh, Balbir@>
@^Rossmanith, Peter@>
@^Schwoon, Stefan@>
@<Subr...@>=
int count_bits @,@,@[ARGS((tetra))@];@+@t}\6{@>
int count_bits(x)
tetra x;
{
register int xx=x;
xx=xx-((xx>>1)&0x55555555);
xx=(xx&0x33333333)+((xx>>2)&0x33333333);
xx=(xx+(xx>>4))&0x0f0f0f0f;
xx=xx+(xx>>8);
return (xx+(xx>>16)) & 0xff;
}
@ To compute the nonnegative byte differences of two given tetrabytes,
we can carry out the following 20-step branchless computation:
@<Subr...@>=
tetra byte_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>
tetra byte_diff(y,z)
tetra y,z;
{
register tetra d=(y&0x00ff00ff)+0x01000100-(z&0x00ff00ff);
register tetra m=d&0x01000100;
register tetra x=d&(m-(m>>8));
d=((y>>8)&0x00ff00ff)+0x01000100-((z>>8)&0x00ff00ff);
m=d&0x01000100;
return x+((d&(m-(m>>8)))<<8);
}
@ To compute the nonnegative wyde differences of two tetrabytes,
another trick leads to a 15-step branchless computation.
(Research problem: Can |count_bits|, |byte_diff|, or |wyde_diff| be done
with fewer operations?)
@<Subr...@>=
tetra wyde_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>
tetra wyde_diff(y,z)
tetra y,z;
{
register tetra a=((y>>16)-(z>>16))&0x10000;
register tetra b=((y&0xffff)-(z&0xffff))&0x10000;
return y-(z^((y^z)&(b-a-(b>>16))));
}
@ The last bitwise subroutine we need is the most interesting:
It implements \MMIX's \.{MOR} and \.{MXOR} operations.
@<Subr...@>=
octa bool_mult @,@,@[ARGS((octa,octa,bool))@];@+@t}\6{@>
octa bool_mult(y,z,xor)
octa y,z; /* the operands */
bool xor; /* do we do xor instead of or? */
{
octa o,x;
register tetra a,b,c;
register int k;
for (k=0,o=y,x=zero_octa;o.h||o.l;k++,o=shift_right(o,8,1))
if (o.l&0xff) {
a=((z.h>>k)&0x01010101)*0xff;
b=((z.l>>k)&0x01010101)*0xff;
c=(o.l&0xff)*0x01010101;
if (xor) x.h^=a&c, x.l^=b&c;
else x.h|=a&c, x.l|=b&c;
}
return x;
}
@* Floating point packing and unpacking. Standard IEEE floating binary
numbers pack a sign, exponent, and fraction into a tetrabyte
or octabyte. In this section we consider basic subroutines that
convert between IEEE format and the separate unpacked components.
@d ROUND_OFF 1
@d ROUND_UP 2
@d ROUND_DOWN 3
@d ROUND_NEAR 4
@<Glob...@>=
int cur_round; /* the current rounding mode */
@ The |fpack| routine takes an octabyte $f$, a raw exponent~$e$,
and a sign~|s|, and packs them
into the floating binary number that corresponds to
$\pm2^{e-1076}f$, using a given rounding mode.
The value of $f$ should satisfy $2^{54}\le f\le 2^{55}$.
Thus, for example, the floating binary number $+1.0=\Hex{3ff0000000000000}$
is obtained when $f=2^{54}$, $e=\Hex{3fe}$, and |s='+'|.
The raw exponent~$e$ is usually one less than
the final exponent value; the leading bit of~$f$ is essentially added
to the exponent. (This trick works nicely for subnormal numbers, when
$e<0$, or in cases where the value of $f$ is rounded upwards to $2^{55}$.)
Exceptional events are noted by oring appropriate bits into
the global variable |exceptions|. Special considerations apply to
underflow, which is not fully specified by Section 7.4 of the IEEE standard:
Implementations of the standard are free to choose between two definitions
of ``tininess'' and two definitions of ``accuracy loss.''
\MMIX\ determines tininess {\it after\/} rounding, hence a result with
$e<0$ is not necessarily tiny; \MMIX\ treats accuracy loss as equivalent
to inexactness. Thus, a result underflows if and only if
it is tiny and either (i)~it is inexact or (ii)~the underflow trap is enabled.
The |fpack| routine sets |U_BIT| in |exceptions| if and only if the result is
tiny, |X_BIT| if and only if the result is inexact.
@^underflow@>
@^tininess@>
@^accuracy loss@>
@d X_BIT (1<<8) /* floating inexact */
@d Z_BIT (1<<9) /* floating division by zero */
@d U_BIT (1<<10) /* floating underflow */
@d O_BIT (1<<11) /* floating overflow */
@d I_BIT (1<<12) /* floating invalid operation */
@d W_BIT (1<<13) /* float-to-fix overflow */
@d V_BIT (1<<14) /* integer overflow */
@d D_BIT (1<<15) /* integer divide check */
@d E_BIT (1<<18) /* external (dynamic) trap bit */
@<Subr...@>=
octa fpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>
octa fpack(f,e,s,r)
octa f; /* the normalized fraction part */
int e; /* the raw exponent */
char s; /* the sign */
int r; /* the rounding mode */
{
octa o;
if (e>0x7fd) e=0x7ff, o=zero_octa;
else {
if (e<0) {
if (e<-54) o.h=0, o.l=1;
else {@+octa oo;
o=shift_right(f,-e,1);
oo=shift_left(o,-e);
if (oo.l!=f.l || oo.h!=f.h) o.l |= 1; /* sticky bit */
@^sticky bit@>
}
e=0;
}@+else o=f;
}
@<Round and return the result@>;
}
@ @<Glob...@>=
int exceptions; /* bits possibly destined for rA */
@ Everything falls together so nicely here, it's almost too good to be true!
@<Round and return the result@>=
if (o.l&3) exceptions |= X_BIT;
switch (r) {
case ROUND_DOWN:@+ if (s=='-') o=incr(o,3);@+break;
case ROUND_UP:@+ if (s!='-') o=incr(o,3);
case ROUND_OFF: break;
case ROUND_NEAR: o=incr(o, o.l&4? 2: 1);@+break;
}
o = shift_right(o,2,1);
o.h += e<<20;
if (o.h>=0x7ff00000) exceptions |= O_BIT+X_BIT; /* overflow */
else if (o.h<0x100000) exceptions |= U_BIT; /* tininess */
if (s=='-') o.h |= sign_bit;
return o;
@ Similarly, |sfpack| packs a short float, from inputs
having the same conventions as |fpack|.
@<Subr...@>=
tetra sfpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>
tetra sfpack(f,e,s,r)
octa f; /* the fraction part */
int e; /* the raw exponent */
char s; /* the sign */
int r; /* the rounding mode */
{
register tetra o;
if (e>0x47d) e=0x47f, o=0;
else {
o=shift_left(f,3).h;
if (f.l&0x1fffffff) o|=1;
if (e<0x380) {
if (e<0x380-25) o=1;
else {@+register tetra o0,oo;
o0 = o;
o = o>>(0x380-e);
oo = o<<(0x380-e);
if (oo!=o0) o |= 1; /* sticky bit */
@^sticky bit@>
}
e=0x380;
}
}
@<Round and return the short result@>;
}
@ @<Round and return the short result@>=
if (o&3) exceptions |= X_BIT;
switch (r) {
case ROUND_DOWN:@+ if (s=='-') o+=3;@+break;
case ROUND_UP:@+ if (s!='-') o+=3;
case ROUND_OFF: break;
case ROUND_NEAR: o+=(o&4? 2: 1);@+break;
}
o = o>>2;
o += (e-0x380)<<23;
if (o>=0x7f800000) exceptions |= O_BIT+X_BIT; /* overflow */
else if (o<0x800000) exceptions |= U_BIT; /* tininess */
if (s=='-') o |= sign_bit;
return o;
@ The |funpack| routine is, roughly speaking, the opposite of |fpack|.
It takes a given floating point number~$x$ and separates out its
fraction part~$f$, exponent~$e$, and sign~$s$. It clears |exceptions|
to zero. It returns the type of value found: |zro|, |num|, |inf|,
or |nan|. When it returns |num|,
it will have set $f$, $e$, and~$s$
to the values from which |fpack| would produce the original number~$x$
without exceptions.
@d zero_exponent (-1000) /* zero is assumed to have this exponent */
@<Other type...@>=
typedef enum {@!zro,@!num,@!inf,@!nan}@+ftype;
@ @<Subr...@>=
ftype funpack @,@,@[ARGS((octa,octa*,int*,char*))@];@+@t}\6{@>
ftype funpack(x,f,e,s)
octa x; /* the given floating point value */
octa *f; /* address where the fraction part should be stored */
int *e; /* address where the exponent part should be stored */
char *s; /* address where the sign should be stored */
{
register int ee;
exceptions=0;
*s=(x.h&sign_bit? '-': '+');
*f=shift_left(x,2);
f->h &= 0x3fffff;
ee=(x.h>>20)&0x7ff;
if (ee) {
*e=ee-1;
f->h |= 0x400000;
return (ee<0x7ff? num: f->h==0x400000 && !f->l? inf: nan);
}
if (!x.l && !f->h) {
*e=zero_exponent;@+ return zro;
}
do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));
*e=ee;@+ return num;
}
@ @<Subr...@>=
ftype sfunpack @,@,@[ARGS((tetra,octa*,int*,char*))@];@+@t}\6{@>
ftype sfunpack(x,f,e,s)
tetra x; /* the given floating point value */
octa *f; /* address where the fraction part should be stored */
int *e; /* address where the exponent part should be stored */
char *s; /* address where the sign should be stored */
{
register int ee;
exceptions=0;
*s=(x&sign_bit? '-': '+');
f->h=(x>>1)&0x3fffff, f->l=x<<31;
ee=(x>>23)&0xff;
if (ee) {
*e=ee+0x380-1;
f->h |= 0x400000;
return (ee<0xff? num: (x&0x7fffffff)==0x7f800000? inf: nan);
}
if (!(x&0x7fffffff)) {
*e=zero_exponent;@+return zro;
}
do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));
*e=ee+0x380;@+ return num;
}
@ Since \MMIX\ downplays 32-bit operations, it uses |sfpack| and |sfunpack|
only when loading and storing short floats, or when converting
from fixed point to floating point.
@<Subr...@>=
octa load_sf @,@,@[ARGS((tetra))@];@+@t}\6{@>
octa load_sf(z)
tetra z; /* 32 bits to be loaded into a 64-bit register */
{
octa f,x;@+int e;@+char s;@+ftype t;
t=sfunpack(z,&f,&e,&s);
switch (t) {
case zro: x=zero_octa;@+break;
case num: return fpack(f,e,s,ROUND_OFF);
case inf: x=inf_octa;@+break;
case nan: x=shift_right(f,2,1);@+x.h|=0x7ff00000;@+break;
}
if (s=='-') x.h|=sign_bit;
return x;
}
@ @<Subr...@>=
tetra store_sf @,@,@[ARGS((octa))@];@+@t}\6{@>
tetra store_sf(x)
octa x; /* 64 bits to be loaded into a 32-bit word */
{
octa f;@+tetra z;@+int e;@+char s;@+ftype t;
t=funpack(x,&f,&e,&s);
switch (t) {
case zro: z=0;@+break;
case num: return sfpack(f,e,s,cur_round);
case inf: z=0x7f800000;@+break;
case nan:@+ if (!(f.h&0x200000)) {
f.h|=0x200000;@+exceptions|=I_BIT; /* NaN was signaling */
}
z=0x7f800000|(f.h<<1)|(f.l>>31);@+break;
}
if (s=='-') z|=sign_bit;
return z;
}
@* Floating multiplication and division.
The hardest fixed point operations were multiplication and division;
but these two operations are the {\it easiest\/} to implement in floating point
arithmetic, once their fixed point counterparts are available.
@<Subr...@>=
octa fmult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa fmult(y,z)
octa y,z;
{
ftype yt,zt;
int ye,ze;
char ys,zs;
octa x,xf,yf,zf;
register int xe;
register char xs;
yt=funpack(y,&yf,&ye,&ys);
zt=funpack(z,&zf,&ze,&zs);
xs=ys+zs-'+'; /* will be |'-'| when the result is negative */
switch (4*yt+zt) {
@t\4@>@<The usual NaN cases@>;
case 4*zro+zro: case 4*zro+num: case 4*num+zro: x=zero_octa;@+break;
case 4*num+inf: case 4*inf+num: case 4*inf+inf: x=inf_octa;@+break;
case 4*zro+inf: case 4*inf+zro: x=standard_NaN;
exceptions|=I_BIT;@+break;
case 4*num+num: @<Multiply nonzero numbers and |return|@>;
}
if (xs=='-') x.h|=sign_bit;
return x;
}
@ @<The usual NaN cases@>=
case 4*nan+nan:@+if (!(y.h&0x80000)) exceptions|=I_BIT; /* |y| is signaling */
case 4*zro+nan: case 4*num+nan: case 4*inf+nan:
if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;
return z;
case 4*nan+zro: case 4*nan+num: case 4*nan+inf:
if (!(y.h&0x80000)) exceptions|=I_BIT, y.h|=0x80000;
return y;
@ @<Multiply nonzero numbers and |return|@>=
xe=ye+ze-0x3fd; /* the raw exponent */
x=omult(yf,shift_left(zf,9));
if (aux.h>=0x400000) xf=aux;
else xf=shift_left(aux,1), xe--;
if (x.h||x.l) xf.l|=1; /* adjust the sticky bit */
return fpack(xf,xe,xs,cur_round);
@ @<Subr...@>=
octa fdivide @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa fdivide(y,z)
octa y,z;
{
ftype yt,zt;
int ye,ze;
char ys,zs;
octa x,xf,yf,zf;
register int xe;
register char xs;
yt=funpack(y,&yf,&ye,&ys);
zt=funpack(z,&zf,&ze,&zs);
xs=ys+zs-'+'; /* will be |'-'| when the result is negative */
switch (4*yt+zt) {
@t\4@>@<The usual NaN cases@>;
case 4*zro+inf: case 4*zro+num: case 4*num+inf: x=zero_octa;@+break;
case 4*num+zro: exceptions|=Z_BIT;
case 4*inf+num: case 4*inf+zro: x=inf_octa;@+break;
case 4*zro+zro: case 4*inf+inf: x=standard_NaN;
exceptions|=I_BIT;@+break;
case 4*num+num: @<Divide nonzero numbers and |return|@>;
}
if (xs=='-') x.h|=sign_bit;
return x;
}
@ @<Divide nonzero numbers...@>=
xe=ye-ze+0x3fd; /* the raw exponent */
xf=odiv(yf,zero_octa,shift_left(zf,9));
if (xf.h>=0x800000) {
aux.l|=xf.l&1;
xf=shift_right(xf,1,1);
xe++;
}
if (aux.h||aux.l) xf.l|=1; /* adjust the sticky bit */
return fpack(xf,xe,xs,cur_round);
@*Floating addition and subtraction. Now for the bread-and-butter
operation, the sum of two floating point numbers.
It is not terribly difficult, but many cases need to be handled carefully.
@<Subr...@>=
octa fplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
octa fplus(y,z)
octa y,z;
{
ftype yt,zt;
int ye,ze;
char ys,zs;
octa x,xf,yf,zf;
register int xe,d;
register char xs;
yt=funpack(y,&yf,&ye,&ys);
zt=funpack(z,&zf,&ze,&zs);
switch (4*yt+zt) {
@t\4@>@<The usual NaN cases@>;
case 4*zro+num: return fpack(zf,ze,zs,ROUND_OFF);@+break; /* may underflow */
case 4*num+zro: return fpack(yf,ye,ys,ROUND_OFF);@+break; /* may underflow */
case 4*inf+inf:@+if (ys!=zs) {
exceptions|=I_BIT;@+x=standard_NaN;@+xs=zs;@+break;
}
case 4*num+inf: case 4*zro+inf: x=inf_octa;@+xs=zs;@+break;
case 4*inf+num: case 4*inf+zro: x=inf_octa;@+xs=ys;@+break;
case 4*num+num:@+ if (y.h!=(z.h^0x80000000) || y.l!=z.l)
@<Add nonzero numbers and |return|@>;
case 4*zro+zro: x=zero_octa;
xs=(ys==zs? ys: cur_round==ROUND_DOWN? '-': '+');@+break;
}
if (xs=='-') x.h|=sign_bit;
return x;
}
@ @<Add nonzero numbers...@>=
{@+octa o,oo;
if (ye<ze || (ye==ze && (yf.h<zf.h || (yf.h==zf.h && yf.l<zf.l))))
@<Exchange |y| with |z|@>;
d=ye-ze;
xs=ys, xe=ye;
if (d) @<Adjust for difference in exponents@>;
if (ys==zs) {
xf=oplus(yf,zf);
if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;
}@+else {
xf=ominus(yf,zf);
if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;
else@+ while (xf.h<0x400000) xe--, xf=shift_left(xf,1);
}
return fpack(xf,xe,xs,cur_round);
}
@ @<Exchange |y| with |z|@>=
{
o=yf, yf=zf, zf=o;
d=ye, ye=ze, ze=d;
d=ys, ys=zs, zs=d;
}
@ Proper rounding requires two bits to the right of the fraction delivered
to~|fpack|. The first is the true next bit of the result;
the other is a ``sticky'' bit, which is nonzero if any further bits of the
true result are nonzero. Sticky rounding to an integer takes
$x$ into the number $\lfloor x/2\rfloor+\lceil x/2\rceil$.
@^sticky bit@>
Some subtleties need to be observed here, in order to
prevent the sticky bit from being shifted left. If we did not
shift |yf| left~1 before shifting |zf| to the right, an incorrect
answer would be obtained in certain cases---for example, if
$|yf|=2^{54}$, $|zf|=2^{54}+2^{53}-4$, $d=52$.
@<Adjust for difference in exponents@>=
{
if (d<=2) zf=shift_right(zf,d,1); /* exact result */
else if (d>54) zf.h=0, zf.l=1; /* tricky but OK */
else {
if (ys!=zs) d--,xe--,yf=shift_left(yf,1);
o=zf;
zf=shift_right(o,d,1);
oo=shift_left(zf,d);
if (oo.l!=o.l || oo.h!=o.h) zf.l|=1;
}
}
@ The comparison of floating point numbers with respect to $\epsilon$
shares some of the characteristics of floating point addition/subtraction.
In some ways it is simpler, and in other ways it is more difficult;
we might as well deal with it now. % anyways
Subroutine |fepscomp(y,z,e,s)| returns 2 if |y|, |z|, or |e| is a NaN
or |e| is negative. It returns 1 if |s=0| and $y\approx z\ (e)$ or if
|s!=0| and $y\sim z\ (e)$,
as defined in Section~4.2.2 of {\sl Seminumerical Algorithms\/};
otherwise it returns~0.
@<Subr...@>=
int fepscomp @,@,@[ARGS((octa,octa,octa,int))@];@+@t}\6{@>
int fepscomp(y,z,e,s)
octa y,z,e; /* the operands */
int s; /* test similarity? */
{
octa yf,zf,ef,o,oo;
int ye,ze,ee;
char ys,zs,es;
register int yt,zt,et,d;
et=funpack(e,&ef,&ee,&es);
if (es=='-') return 2;
switch (et) {
case nan: return 2;
case inf: ee=10000;
case num: case zro: break;
}
yt=funpack(y,&yf,&ye,&ys);
zt=funpack(z,&zf,&ze,&zs);
switch (4*yt+zt) {
case 4*nan+nan: case 4*nan+inf: case 4*nan+num: case 4*nan+zro:
case 4*inf+nan: case 4*num+nan: case 4*zro+nan: return 2;
case 4*inf+inf: return (ys==zs || ee>=1023);
case 4*inf+num: case 4*inf+zro: case 4*num+inf: case 4*zro+inf:
return (s && ee>=1022);
case 4*zro+zro: return 1;
case 4*zro+num: case 4*num+zro:@+ if (!s) return 0;
case 4*num+num: break;
}
@<Compare two numbers with respect to epsilon and |return|@>;
}
@ The relation $y\approx z\ (\epsilon)$ reduces to
$y\sim z\ (\epsilon/2^d)$, if $d$~is the difference between the
larger and smaller exponents of $y$ and~$z$.
@<Compare two numbers with respect to epsilon and |return|@>=
@<Unsubnormalize |y| and |z|, if they are subnormal@>;
if (ye<ze || (ye==ze && (yf.h<zf.h || (yf.h==zf.h && yf.l<zf.l))))
@<Exchange |y| with |z|@>;
if (ze==zero_exponent) ze=ye;
d=ye-ze;
if (!s) ee-=d;
if (ee>=1023) return 1; /* if $\epsilon\ge2$, $z\in N_\epsilon(y)$ */
@<Compute the difference of fraction parts, |o|@>;
if (!o.h && !o.l) return 1;
if (ee<968) return 0; /* if $y\ne z$ and $\epsilon<2^{-54}$, $y\not\sim z$ */
if (ee>=1021) ef=shift_left(ef,ee-1021);
else ef=shift_right(ef,1021-ee,1);
return o.h<ef.h || (o.h==ef.h && o.l<=ef.l);
@ @<Unsubnormalize |y| and |z|, if they are subnormal@>=
if (ye<0 && yt!=zro) yf=shift_left(y,2), ye=0;
if (ze<0 && zt!=zro) zf=shift_left(z,2), ze=0;
@ At this point $y\sim z$ if and only if
$$|yf|+(-1)^{[ys=zs]}|zf|/2^d\le 2^{ee-1021}|ef|=2^{55}\epsilon.$$
We need to evaluate this relation without overstepping the bounds of
our simulated 64-bit registers.
When $d>2$, the difference of fraction parts might not fit exactly
in an octabyte;
in that case the numbers are not similar unless $\epsilon>3/8$,
and we replace the difference by the ceiling of the
true result. When $\epsilon<1/8$, our program essentially replaces
$2^{55}\epsilon$ by $\lfloor2^{55}\epsilon\rfloor$. These
truncations are not needed simultaneously. Therefore the logic
is justified by the facts that, if $n$ is an integer, we have
$x\le n$ if and only if $\lceil x\rceil\le n$;
$n\le x$ if and only if $n\le\lfloor x\rfloor$. (Notice that the
concept of ``sticky bit'' is {\it not\/} appropriate here.)
@^sticky bit@>
@<Compute the difference of fraction parts, |o|@>=
if (d>54) o=zero_octa,oo=zf;
else o=shift_right(zf,d,1),oo=shift_left(o,d);
if (oo.h!=zf.h || oo.l!=zf.l) { /* truncated result, hence $d>2$ */
if (ee<1020) return 0; /* difference is too large for similarity */
o=incr(o,ys==zs? 0: 1); /* adjust for ceiling */
}
o=(ys==zs? ominus(yf,o): oplus(yf,o));
@*Floating point output conversion.
The |print_float| routine converts an octabyte to a floating decimal
representation that will be input as precisely the same value.
@^binary-to-decimal conversion@>
@^radix conversion@>
@^multiprecision conversion@>
@<Subr...@>=
static void bignum_times_ten @,@,@[ARGS((bignum*))@];
static void bignum_dec @,@,@[ARGS((bignum*,bignum*,tetra))@];
static int bignum_compare @,@,@[ARGS((bignum*,bignum*))@];
void print_float @,@,@[ARGS((octa))@];@+@t}\6{@>
void print_float(x)
octa x;
{
@<Local variables for |print_float|@>;
if (x.h&sign_bit) printf("-");
@<Extract the exponent |e| and determine the
fraction interval $[f\dts g]$ or $(f\dts g)$@>;
@<Store $f$ and $g$ as multiprecise integers@>;
@<Compute the significant digits |s| and decimal exponent |e|@>;
@<Print the significant digits with proper context@>;
}
@ One way to visualize the problem being solved here is to consider
the vastly simpler case in which there are only 2-bit exponents
and 2-bit fractions. Then the sixteen possible 4-bit combinations
have the following interpretations:
$$\def\\{\;\dts\;}
\vbox{\halign{#\qquad&$#$\hfil\cr
0000&[0\\0.125]\cr
0001&(0.125\\0.375)\cr
0010&[0.375\\0.625]\cr
0011&(0.625\\0.875)\cr
0100&[0.875\\1.125]\cr