-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathecm.cpp
16749 lines (13918 loc) · 731 KB
/
ecm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**************************************************************
*
* ecm.cpp
*
* ECM, P-1, and P+1 factoring routines
*
* Original author: Richard Crandall - www.perfsci.com
* Adapted to Mersenne numbers and optimized by George Woltman
* Further optimizations from Paul Zimmerman's GMP-ECM program
* Other important ideas courtesy of Peter Montgomery, Alex Kruppa, Mihai Preda, Pavel Atnashev.
*
* c. 1997 Perfectly Scientific, Inc.
* c. 1998-2024 Mersenne Research, Inc.
* All Rights Reserved.
*
*************************************************************/
/* Includes */
#include "common.h" // Included in all GIMPS sources
#include "commonb.h"
#include "commonc.h"
#include "ecm.h"
#include "exponentiate.h"
#include "pair.h"
#include "pm1prob.h"
#include "polymult.h"
#include "primenet.h"
#include <iterator>
#include <map>
/* Include much of resume.c from GMP_ECM sources */
#include "resume_gmp.c"
/* Global variables */
int QA_IN_PROGRESS = FALSE;
int QA_TYPE = 0;
giant QA_FACTOR = NULL;
int PRAC_SEARCH = 7;
/* C++14 feature we want to use, but we only require C++11 */
namespace local {
template< class Iterator >
std::reverse_iterator<Iterator> make_reverse_iterator(Iterator i)
{
return std::reverse_iterator<Iterator>(i);
}
}
/* Forward declarations for structures used to track a Weierstrass, Edwards, and Montgomery ECM point -- needed for Dmultiple class and ecm handle */
struct ws {
gwnum x; /* x or FFT(x) */
gwnum y; /* y or FFT(y) */
gwnum z; /* z or FFT(z) */
};
struct ed {
gwnum x; /* x or FFT(x) */
gwnum y; /* y or FFT(y) */
gwnum z; /* z or FFT(z), if NULL implies z = 1 */
gwnum t; /* Extended coordinates, see "Twisted Edwards Curves Revisited" by Huseyin Hisil, et.al. */
bool alternate_format; /* Coordinates are in an alternate format (see ed_funky_dbl) */
};
struct xz {
gwnum x; /* x or FFT(x) */
gwnum z; /* z or FFT(z) */
};
/**********************************************************************************************************************/
/* Manage sets of relative primes to D/2 */
/**********************************************************************************************************************/
/* Experimentally derived sets of D/2 relative primes. Balancing good pairing % with faster initialization times. */
/* Data came from analyzing pairing percentage for B1=700000, B2=23100000, D=210, density = 0.29. */
/* This set of data requires some intermediate calculations during initialization. */
// The first value is the number of extra full sets, the second value is the number of extra partial sets. The final values are the extra sets.
// For now we are not using these relp sets that require intermediate calculations during init. Someday we should try incorporating them.
#ifdef BETTER_RELP_SETS
int16_t relp_set_hard4[] = {4,1,1, 0,1,4,10 ,2,7}; // 69.89%
int16_t relp_set_hard4a[] = {4,0,1, 0,1,3,9 ,6}; // 69.88%
//int16_t relp_set_hard4b[] = {4,1,0, 0,1,4,9 ,2}; // 69.88%
//int16_t relp_set_hard4c[] = {4,1,1, 0,1,4,16 ,2,8}; // 69.87%
int16_t relp_set_hard5[] = {5,1,1, 0,1,4,9,36 ,2,18}; // 77.00%
//int16_t relp_set_hard5a[] = {5,2,1, 0,1,4,10,25 ,2,6,12}; // 76.99%
//int16_t relp_set_hard5b[] = {5,2,1, 0,1,4,10,28 ,2,7,14}; // 76.97%
//int16_t relp_set_hard5c[] = {5,2,1, 0,1,4,16,64 ,2,8,32}; // 76.96%
//int16_t relp_set_hard5d[] = {5,1,1, 0,1,4,9,25 ,2,17}; // 76.95%
int16_t relp_set_hard5e[] = {5,0,1, 0,1,3,7,28 ,14}; // 76.89%
int16_t relp_set_hard6[] = {6,2,1, 0,1,3,9,21,84 ,6,12,42}; // 82.89%
//int16_t relp_set_hard6a[] = {xxx, 0,1,4,9,36,84}; // 82.87%
//int16_t relp_set_hard6b[] = {xxx, 0,1,3,10,28,73}; // 82.87%
int16_t relp_set_hard6c[] = {6,1,1, 0,1,3,7,21,63 ,14,42}; // 82.72%
int16_t relp_set_hard6d[] = {6,0,1, 0,1,3,7,15,63 ,31}; // 82.64%
int16_t relp_set_hard7[] = {7,4,1, 0,1,3,9,21,84,49 ,6,7,14,42,28}; // 87.70%
//int16_t relp_set_hard7a[] = {xxx, 0,1,4,9,36,84,109}; // 87.70%
//int16_t relp_set_hard7b[] = {xxx, 0,1,3,9,21,84,129}; // 87.68%
int16_t relp_set_hard7c[] = {7,0,1, 0,1,3,7,15,31,127 ,63}; // 87.50%
//int16_t relp_set_hard8[] = {xxx, 0,1,3,9,21,84,49,129}; // 91.58%
//int16_t relp_set_hard8a[] = {xxx, 0,1,3,9,21,84,49,109}; // 91.51%
//int16_t relp_set_hard9[] = {xxx, 0,1,3,7,15,31,63,127,90}; // 94.26%
// This "typo" easy9 produced better results sometimes, like 1% better for B1=10000, B2=500000, buffers=2160
//int16_t relp_set_NOTeasy9[] = {9,0,0, 0,1,3,7,15,31,47,63,27};
#endif
//GW I'm getting occasional reports of crashes on big pairing jobs (https://www.mersenneforum.org/showpost.php?p=643362&postcount=511).
//GW Polymult has pretty much made large pairing jobs irrelevant, so I'm capping the max relp sets at 12 in hopes that eliminates crashes.
#define MAX_RELP_SETS 12
/* This set of data requires no intermediate calculations (except for discarding the usable set -1) during initialization. */
/* That is, every D/2 set can be calculated be adding a multiple-of-D to a previous D/2 set, where the difference is also a previous D/2 set. */
/* NOTE: We might eek out a little more performance by using "negative sets". Try building on 0,1,-3 = 61.13%. If we do explore negative sets, */
/* beware that there are optimizations dealing with relocs near B2_start and B2_end in pair.cpp that assumes sets are predominately positive. */
int16_t relp_set_easy2[] = {2,0,0, 0,-1}; // 33.16% 49.57%
int16_t relp_set_easy3[] = {3,0,0, 0,1,-3}; // 61.13%
int16_t relp_set_easy4[] = {4,0,0, 0,1,3,7}; // 69.69%
int16_t relp_set_easy5[] = {5,0,0, 0,1,3,7,15}; // 76.74%
int16_t relp_set_easy6[] = {6,0,0, 0,1,3,7,15,31}; // 82.51%
int16_t relp_set_easy7[] = {7,0,0, 0,1,3,7,15,31,63}; // 87.40%
int16_t relp_set_easy8[] = {8,0,0, 0,1,3,7,15,31,63,127}; // 91.43%
int16_t relp_set_easy9[] = {9,0,0, 0,1,3,7,15,31,63,127,47}; // 93.13%
int16_t relp_set_easy10[] = {10,0,0, 0,1,3,6,13,27,55,111,223,51}; // 96.04%
int16_t relp_set_easy11[] = {11,0,0, 0,1,3,6,13,27,48,55,111,223,5}; // 97.36%
int16_t relp_set_easy12[] = {12,0,0, 0,1,3,5,6,13,27,48,55,111,223,96}; // 98.17%
int16_t relp_set_easy13[] = {13,0,0, 0,1,3,5,6,13,21,27,48,55,111,223,219}; // 98.70%
int16_t relp_set_easy14[] = {14,0,0, 0,1,3,5,6,13,21,27,48,55,111,219,223,209}; // 99.03%
int16_t relp_set_easy15[] = {15,0,0, 0,1,3,5,6,13,21,27,48,55,111,219,223,209,83}; // 99.25%
int16_t relp_set_easy16[] = {16,0,0, 0,1,3,5,6,13,21,27,48,55,83,111,209,219,223,201}; // 99.39%
int16_t relp_set_easy17[] = {17,0,0, 0,1,3,5,6,13,21,27,48,55,83,111,201,209,219,223,216}; // 99.48%
int16_t relp_set_easy18[] = {18,0,0, 0,1,3,5,6,13,21,27,48,55,83,111,201,209,216,219,223,231}; // 99.55%
int16_t relp_set_easy19[] = {19,0,0, 0,1,3,5,6,13,21,27,48,55,83,111,201,209,216,219,223,231,20}; // 99.59%
int16_t relp_set_easy20[] = {20,0,0, 0,1,3,5,6,13,20,21,27,48,55,83,111,201,209,216,219,223,231,222}; // 99.62%
/* Return a relp_set that will work with the given multiplier */
/* Over time we may enhance this to return different relp_sets based on the bounds and/or cost of computing intermediate sets. */
int16_t *relp_set_selection (int L) {
if (L > MAX_RELP_SETS) L = MAX_RELP_SETS;
if (L <= 2) return relp_set_easy2;
if (L <= 3) return relp_set_easy3;
if (L <= 4) return relp_set_easy4;
if (L <= 5) return relp_set_easy5;
if (L <= 6) return relp_set_easy6;
if (L <= 7) return relp_set_easy7;
if (L <= 8) return relp_set_easy8;
if (L <= 9) return relp_set_easy9;
if (L <= 10) return relp_set_easy10;
if (L <= 11) return relp_set_easy11;
if (L <= 12) return relp_set_easy12;
if (L <= 13) return relp_set_easy13;
if (L <= 14) return relp_set_easy14;
if (L <= 15) return relp_set_easy15;
if (L <= 16) return relp_set_easy16;
if (L <= 17) return relp_set_easy17;
if (L <= 18) return relp_set_easy18;
if (L <= 19) return relp_set_easy19;
return relp_set_easy20;
}
/* Return the maximum relp_set from the array of relp_sets */
int16_t get_max_relp_set (
int16_t *relp_sets)
{
int16_t num_permanent_sets = *relp_sets;
int16_t max_set;
for (int16_t i = 0; i < num_permanent_sets; i++) {
if (i == 0 || max_set < relp_sets[3 + i]) max_set = relp_sets[3 + i];
}
return (max_set);
}
// Data stored for each relative prime set to compute. These are stored in a map sorted by relp set number
class relp_set_data {
public:
// Constructor
relp_set_data (int16_t b, bool c, bool d)
{ last_full_used_by = last_partial_used_by = b; nQx_store = base = Dmultiple = diff = 0; permanent = c; partial = d; }
// Determine if this is the last use of a value in calculating relp data
bool is_last_use (int16_t relp_set, int i, int num_partials, int numrels, bool inverted_access_pattern) {
// If the relp_set being calculated matches the last-partial-usage of this set, then this is the last use
if (relp_set == last_partial_used_by) return (TRUE);
// If the relp_set being calculated matches neither the last-partial-usage or last-full-usage of this set, then this is not the last use
if (relp_set != last_full_used_by) return (FALSE);
// See if inverted access patterns cancel out
inverted_access_pattern ^= last_partial_use_inverted;
// Return TRUE if not accessing one of the relps that are needed in the last-partial-usage
return ((!inverted_access_pattern && i >= num_partials) || (inverted_access_pattern && i < numrels - num_partials));
}
// Data
int16_t nQx_store; // Where this relp_set is stored in the nQx array
int16_t last_full_used_by; // The largest full relp_set that used this relp_set as base or diff
int16_t last_partial_used_by; // The largest partial relp_set that used this relp_set as base or diff
int16_t base; // Set will be computed by base + Dmultiple and diff
int16_t Dmultiple; // Set will be computed by base + Dmultiple and diff
int16_t diff; // Set will be computed by base + Dmultiple and diff
bool permanent; // TRUE if this relp_set is used in creating prime pairings
bool partial; // TRUE if only part of the relp_set must be calculated
bool last_partial_use_inverted; // TRUE if the last partial usage is in reverse order
};
// Custom sort order so that relp_sets are ordered 0,-1,1,-2,... This mimics the order in which the sets need to be computed.
struct relpclasscomp {
bool operator() (const int16_t &lhs, const int16_t &rhs) const {
return (abs(lhs) < abs(rhs) || (abs(lhs) == abs(rhs) && lhs < rhs));
}
};
using relp_set_data_map = std::map<int16_t,relp_set_data,relpclasscomp>;
// Data stored for each multiple-of-D used in calculating relp_sets. These are stored in a sorted map.
class Dmultiple_data {
public:
// Constructor
Dmultiple_data (int16_t rlub, uint64_t dlub) {val.x = NULL; val.z = NULL; relp_last_used_by = rlub; Dmult_last_used_by = dlub; base = addin = diff = 0; }
// Destructor
~Dmultiple_data () { if (!do_not_free_val) { gwfree (gwdata, val.x); gwfree (gwdata, val.z); } }
// Set val buffers
void set_val_buffers (gwhandle *gw, gwnum x, bool do_not_free_flag) { gwdata = gw; val.x = x; val.z = NULL; do_not_free_val = do_not_free_flag; }
void set_val_buffers (gwhandle *gw, gwnum x, gwnum z, bool do_not_free_flag) { gwdata = gw; val.x = x; val.z = z; do_not_free_val = do_not_free_flag; }
// Update Dmult_last_used_by
void set_Dmult_last_used_by (uint64_t x) { if (Dmult_last_used_by < x) Dmult_last_used_by = x; }
// Free value if Dmult_last_used_by satisfied
void free_if_Dmult_last_used_by (uint64_t x) {
if (do_not_free_val) return;
if (relp_last_used_by == 0 && Dmult_last_used_by == x) {
gwfree (gwdata, val.x);
gwfree (gwdata, val.z);
val.x = val.z = NULL;
}
}
// Return TRUE if relp_last_used_by is satisfied
bool will_free_due_to_relp_last_used_by (int16_t x) {
if (do_not_free_val) return (FALSE);
return (relp_last_used_by == x);
}
// Free value if relp_last_used_by satisfied
void free_if_relp_last_used_by (int16_t x) {
if (will_free_due_to_relp_last_used_by (x)) {
gwfree (gwdata, val.x);
gwfree (gwdata, val.z);
val.x = val.z = NULL;
}
}
// Data
int16_t relp_last_used_by; // The largest relp_set that needs this Dmultiple. Zero if not needed by any relp_sets.
uint64_t Dmult_last_used_by; // The largest Dmultiple calculation that needs this Dmultiple
uint64_t base; // Dmultiple will be computed by base + addin and diff
uint64_t addin; // Dmultiple will be computed by base + Dmultiple and diff
uint64_t diff; // Dmultiple will be computed by base + Dmultiple and diff
struct xz val; // Ptr to multiple-of-D value stored as a struct xz for ECM (P-1/P+1 stores the value in val.x)
gwhandle *gwdata; // Handle used to free val's gwnums
bool do_not_free_val; // Flag to prohibit freeing val's gwnums
};
using Dmultiple_data_map = std::map<uint64_t,Dmultiple_data>;
// Populate the relp_set_data and Dmultiple_data structures based on the relp_set that has been selected
void process_relp_sets (
int totrels,
int numrels,
int16_t *relp_sets,
relp_set_data_map &relp_set_map,
Dmultiple_data_map &Dmultiple_map)
{
int16_t num_permanent_sets = *relp_sets++;
int16_t num_intermediate_full_sets = *relp_sets++;
int16_t num_intermediate_parital_sets = *relp_sets++;
int16_t num_sets = num_permanent_sets + num_intermediate_full_sets + num_intermediate_parital_sets;
// Create an entry for each relp_set plus the special -1 relp_set which is always calculated.
for (int16_t i = 0; i < num_sets + 1; i++) {
bool permanent = (i < num_permanent_sets);
bool partial = (i == num_permanent_sets - 1) || (i >= num_sets - num_intermediate_parital_sets && i < num_sets);
int16_t set;
// Get the relp_set #, the last iteration through this loop processes the special -1 relp_set
if (i == num_sets) { if (relp_set_map.find(-1) != relp_set_map.end()) continue; set = -1; }
else set = relp_sets[i];
// Add relp_set to the map
relp_set_map.insert ({set, {set, permanent, partial}});
}
// Loop through relp_sets to decide where each set is stored in the nQx array. The nQx array indexes must match the ordering used by
// fill_pairmap in pair.cpp. The pairing routine sorts the relative primes.
int relps_in_partial_set = one_based_modulo (totrels, numrels);
int permanent_nQx_index = 0;
int temporary_nQx_index = (num_permanent_sets - 1) * numrels + relps_in_partial_set;
for (auto this_set = relp_set_map.begin(); this_set != relp_set_map.end(); ++this_set) {
if (this_set->second.permanent) {
this_set->second.nQx_store = permanent_nQx_index;
permanent_nQx_index += this_set->second.partial ? relps_in_partial_set : numrels;
} else {
this_set->second.nQx_store = temporary_nQx_index;
temporary_nQx_index += this_set->second.partial ? relps_in_partial_set : numrels;
}
}
// Now loop through relp_sets calculating how each will be constructed
for (auto this_set = relp_set_map.begin(); this_set != relp_set_map.end(); ++this_set) {
// Relp sets 0 and -1 are precomputed
if (this_set->first == 0 || this_set->first == -1) continue;
// Scan through smaller sets looking for a base set and diff set that lets us create this set using a Dmultiple and Lucas additions
for (auto base_set = local::make_reverse_iterator(this_set); ; ++base_set) { // Can use std::make_reverse_iterator in C++14
// This relp_set and base relp_set must have the same sign
if (((this_set->first < 0) ^ (base_set->first < 0)) == 1) continue;
// Check if a diff_set exists such that base_set can be used to calculate this_set
int16_t Dmultiple = abs(this_set->first) - abs(base_set->first);
int16_t diff = (this_set->first > 0) ? base_set->first - Dmultiple : base_set->first + Dmultiple;
auto diff_set = relp_set_map.find(diff);
if (diff_set == relp_set_map.end()) continue;
// Don't build a full relp set from a partial relp set
if (!this_set->second.partial && (base_set->second.partial || diff_set->second.partial)) continue;
// Don't build a negative partial relp set from a partial diff relp set. Reverse indexing into the partial diff set will not work.
if (this_set->first < 0 && this_set->second.partial && diff_set->second.partial) continue;
// We've found our Lucas addition, remember it
this_set->second.base = base_set->first;
this_set->second.Dmultiple = Dmultiple;
this_set->second.diff = diff;
// Base set is never accessed in inverted order
base_set->second.last_partial_used_by = this_set->first;
base_set->second.last_partial_use_inverted = FALSE;
if (!this_set->second.partial) base_set->second.last_full_used_by = this_set->first;
// Diff set might be accessed in inverted order
diff_set->second.last_partial_used_by = this_set->first;
diff_set->second.last_partial_use_inverted = (this_set->first < 0) ^ (diff_set->first < 0);
if (!this_set->second.partial) diff_set->second.last_full_used_by = this_set->first;
// Update or create Dmultiple data
auto Dfind = Dmultiple_map.find(Dmultiple);
if (Dfind != Dmultiple_map.end()) Dfind->second.relp_last_used_by = this_set->first;
else Dmultiple_map.insert ({(uint64_t) Dmultiple, {this_set->first, (uint64_t) Dmultiple}});
break;
}
}
}
// Process the Dmultiple_data_map, finding best way to calculate the needed D-multiples
void process_Dmult_map (
Dmultiple_data_map &Dmultiple_map)
{
// Determine how all the multiples-of-D will be calculated
for (auto this_Dmult = Dmultiple_map.rbegin(); this_Dmult != Dmultiple_map.rend(); ++this_Dmult) {
if (this_Dmult->first == 1) break;
// Create a default Lucas addition solution in case no better plan is found
uint64_t proposed_base, proposed_addin, proposed_diff, create_1, create_2;
if ((this_Dmult->first & 1) == 0) { // Even multiple of D
proposed_base = this_Dmult->first / 2;
proposed_addin = this_Dmult->first / 2;
proposed_diff = 0;
create_1 = proposed_base;
create_2 = 0;
} else { // Odd multiple of D
proposed_base = this_Dmult->first / 2 + 1;
proposed_addin = this_Dmult->first / 2;
proposed_diff = 1;
create_1 = proposed_base;
create_2 = proposed_addin;
}
// Scan through smaller D-multiples looking for a base, addin, and diff that lets us compute this D-multiple using Lucas addition
// We scan in the forward direction to find a doubling solution first (ell_dbl is cheaper than ell_add).
for (auto base_Dmult = Dmultiple_map.lower_bound((this_Dmult->first+1)/2); base_Dmult->first < this_Dmult->first; base_Dmult++) {
// Computed the needed addin and diff
uint64_t addin = this_Dmult->first - base_Dmult->first;
uint64_t diff = base_Dmult->first - addin;
bool addin_exists = (Dmultiple_map.find(addin) != Dmultiple_map.end());
bool diff_exists = (diff == 0 || Dmultiple_map.find(diff) != Dmultiple_map.end());
// If we've found a suitable Lucas addition, remember it
if (addin_exists && diff_exists) {
proposed_base = base_Dmult->first;
proposed_addin = addin;
proposed_diff = diff;
create_1 = create_2 = 0;
break;
}
// If we have addin, remember diff as a possible route to computing this_Dmult
if (addin_exists && diff < create_1) {
proposed_base = base_Dmult->first;
proposed_addin = addin;
proposed_diff = diff;
create_1 = diff;
create_2 = 0;
}
// If we have diff, remember addin as a possible route to computing this_Dmult
if (diff_exists && addin < create_1) {
proposed_base = base_Dmult->first;
proposed_addin = addin;
proposed_diff = diff;
create_1 = addin;
create_2 = 0;
}
}
// Record the best proposed solution
this_Dmult->second.base = proposed_base;
this_Dmult->second.addin = proposed_addin;
this_Dmult->second.diff = proposed_diff;
if (create_1) Dmultiple_map.insert ({create_1, {0, create_1}}); // Create new D-multiple not used by any relp set
if (create_2) Dmultiple_map.insert ({create_2, {0, create_2}}); // Create new D-multiple not used by any relp set
// Maintain Dmult_last_used_by
Dmultiple_map.find (proposed_base)->second.set_Dmult_last_used_by (this_Dmult->first);
if (proposed_diff != 0) {
Dmultiple_map.find (proposed_addin)->second.set_Dmult_last_used_by (this_Dmult->first);
Dmultiple_map.find (proposed_diff)->second.set_Dmult_last_used_by (this_Dmult->first);
}
}
}
/**********************************************************************************************************************/
/* ECM, P-1, and P+1 best stage 2 implementation routines */
/**********************************************************************************************************************/
/* Various D values that we will consider in creating an ECM, P-1 or P+1 plan */
/* Selecting the best D value is tricky. There are (at least) 5 different factors at play) */
/* 1) The more primes in D, the higher the density of primes among the remaining relative primes, which helps pairing */
/* 2) The higher D is the less cost there is moving from D-section to D-section */
/* 3) For a given amount of memory, a higher D means lower multiplier (totrels/numrels) which hurts pairing */
/* 4) A lower first missing prime, means a higher B2 start which translates to fewer D sections (great for ECM) and more relocatables for better pairing */
/* 5) A lower second missing prime means more relocatables with multiple relocations, better for pairing */
struct D_data {
int D;
int numrels; // Number of values less than D/2 that are relatively prime to D
int first_missing_prime; // First prime that does not divide D
int second_missing_prime; // Second prime that does not divide D
} D_data[] = {
{ 2 * 3 * 5 , 2 * 4 / 2, 7, 11 }, // 30, 4
{ 2 * 3 * 7 , 2 * 6 / 2, 5, 11 }, // 42, 6
{ 2 * 3 * 5 * 2, 2 * 4 / 2 * 2, 7, 11 }, // 60, 8
{ 2 * 3 * 5 * 3, 2 * 4 / 2 * 3, 7, 11 }, // 90, 12
{ 2 * 3 * 5 * 4, 2 * 4 / 2 * 4, 7, 11 }, // 120, 16
{ 2 * 3 * 7 * 3, 2 * 6 / 2 * 3, 5, 11 }, // 126, 18
{ 2 * 3 * 5 * 5, 2 * 4 / 2 * 5, 7, 11 }, // 150, 20
{ 2 * 3 * 5 * 7 , 2 * 4 * 6 / 2, 11, 13 }, // 210, 24
{ 2 * 3 * 5 * 11 , 2 * 4 * 10 / 2, 7, 13 }, // 330, 40
{ 2 * 3 * 7 * 7, 2 * 6 / 2 * 7, 5, 11 }, // 294, 42
{ 2 * 3 * 5 * 7 * 2, 2 * 4 * 6 / 2 * 2, 11, 13 }, // 420, 48
{ 2 * 3 * 7 * 11 , 2 * 6 * 10 / 2, 5, 13 }, // 462, 60
{ 2 * 3 * 5 * 7 * 3, 2 * 4 * 6 / 2 * 3, 11, 13 }, // 630, 72
{ 2 * 3 * 7 *14, 2 * 6 / 2 * 14, 5, 11 }, // 588, 84
{ 2 * 3 * 5 * 7 * 4, 2 * 4 * 6 / 2 * 4, 11, 13 }, // 840, 96
{ 2 * 3 * 7 *16, 2 * 6 / 2 * 16, 5, 11 }, // 672, 96
{ 2 * 3 * 7 *18, 2 * 6 / 2 * 18, 5, 11 }, // 756, 108
{ 2 * 3 * 5 * 7 * 5, 2 * 4 * 6 / 2 * 5, 11, 13 }, // 1050, 120
{ 2 * 3 * 7 * 11 * 2, 2 * 6 * 10 / 2 * 2, 5, 13 }, // 924, 120
{ 2 * 3 * 7 *21, 2 * 6 / 2 * 21, 5, 11 }, // 882, 126
{ 2 * 3 * 5 * 7 * 6, 2 * 4 * 6 / 2 * 6, 11, 13 }, // 1260, 144
{ 2 * 3 * 5 * 7 * 7, 2 * 4 * 6 / 2 * 7, 11, 13 }, // 1470, 168
{ 2 * 3 * 7 * 11 * 3, 2 * 6 * 10 / 2 * 3, 5, 13 }, // 1386, 180
{ 2 * 3 * 5 * 7 * 8, 2 * 4 * 6 / 2 * 8, 11, 13 }, // 1680, 192
{ 2 * 3 * 5 * 7 * 9, 2 * 4 * 6 / 2 * 9, 11, 13 }, // 1890, 216
{ 2 * 3 * 7 * 11 * 4, 2 * 6 * 10 / 2 * 4, 5, 13 }, // 1848, 240
{ 2 * 3 * 5 * 7 * 11 , 2 * 4 * 6 * 10 / 2, 13, 17 }, // 2310, 240
{ 2 * 3 * 5 * 7 * 13 , 2 * 4 * 6 * 12 / 2, 11, 17 }, // 2730, 288
{ 2 * 3 * 7 * 11 * 6, 2 * 6 * 10 / 2 * 6, 5, 13 }, // 2772, 360
{ 2 * 3 * 5 * 7 * 11 * 2, 2 * 4 * 6 * 10 / 2 * 2, 13, 17 }, // 4620, 480
{ 2 * 3 * 5 * 11 * 13 , 2 * 4 * 10 * 12 / 2, 7, 17 }, // 4290, 480
{ 2 * 3 * 5 * 7 * 11 * 3, 2 * 4 * 6 * 10 / 2 * 3, 13, 17 }, // 6930, 720
{ 2 * 3 * 7 * 11 * 13 , 2 * 6 * 10 * 12 / 2, 5, 17 }, // 6006, 720
{ 2 * 3 * 5 * 7 * 11 * 4, 2 * 4 * 6 * 10 / 2 * 4, 13, 17 } // 9240, 960
};
#define NUM_D (sizeof(D_data)/sizeof(struct D_data))
#define MAX_D 9240
#define MAX_RELPRIMES 960
#ifdef ALTERNATE_D_DATA_FOR_POLYS_BUILT_FROM_TABLE_BELOW
struct D_data poly_D_data[] = {
{ 2 * 3 * 9, 1 *9, 5, 7 }, // 54, 9
{ 2 * 3 *11, 10 , 5, 7 }, // 66, 10
{ 2 * 3 *13, 12 , 5, 7 }, // 78, 12
{ 2 * 3 *17, 16 , 5, 7 }, // 102, 16
{ 2 * 3 *19, 18 , 5, 7 }, // 114, 18
{ 2 * 3 * 5 , 4 , 7, 11 }, // 30, 4
{ 2 * 3 * 5 * 3, 4 *3, 7, 11 }, // 90, 12
{ 2 * 3 * 5 * 5, 4 *5, 7, 11 }, // 150, 20
{ 2 * 3 * 5 * 9, 4 *9, 7, 11 }, // 270, 36
{ 2 * 3 * 5 *13, 4 * 12 , 7, 11 }, // 390, 48
{ 2 * 3 * 5 *15, 4 *15, 7, 11 }, // 450, 60
{ 2 * 3 * 5 *17, 4 * 16 , 7, 11 }, // 510, 64
{ 2 * 3 * 5 *19, 4 * 18 , 7, 11 }, // 570, 72
{ 2 * 3 * 7 , 6 , 5, 11 }, // 42, 6
{ 2 * 3 * 7 * 3, 6 *3, 5, 11 }, // 126, 18
{ 2 * 3 * 7 * 7, 6 *7, 5, 11 }, // 294, 42
{ 2 * 3 * 7 * 9, 6 *9, 5, 11 }, // 378, 54
{ 2 * 3 * 7 *13, 6 * 12 , 5, 11 }, // 546, 72
{ 2 * 3 * 7 *17, 6 * 16 , 5, 11 }, // 714, 96
{ 2 * 3 * 7 *19, 6 * 18 , 5, 11 }, // 798, 108
{ 2 * 3 * 5 * 7 , 4 * 6 , 11, 13 }, // 210, 24
{ 2 * 3 * 5 * 7 * 3, 4 * 6 *3, 11, 13 }, // 630, 72
{ 2 * 3 * 5 * 7 * 5, 4 * 6 *5, 11, 13 }, // 1050, 120
{ 2 * 3 * 5 * 7 * 7, 4 * 6 *7, 11, 13 }, // 1470, 168
{ 2 * 3 * 5 * 7 * 9, 4 * 6 *9, 11, 13 }, // 1890, 216
{ 2 * 3 * 5 * 7 *15, 4 * 6 *15, 11, 13 }, // 3150, 360
{ 2 * 3 * 5 * 7 *17, 4 * 6 * 16 , 11, 13 }, // 3570, 384
{ 2 * 3 * 5 * 7 *19, 4 * 6 * 18 , 11, 13 }, // 3990, 432
{ 2 * 3 * 7 * 11 , 6 * 10 , 5, 13 }, // 462, 60
{ 2 * 3 * 7 * 11 *3, 6 * 10 *3, 5, 13 }, // 1386, 180
{ 2 * 3 * 7 * 11 *7, 6 * 10 *7, 5, 13 }, // 3234, 420
{ 2 * 3 * 7 * 11 *9, 6 * 10 *9, 5, 13 }, // 4158, 540
{ 2 * 3 * 7 * 11 *11, 6 * 10 *11, 5, 13 }, // 5082, 660
{ 2 * 3 * 7 * 11 *17, 6 * 10 * 16 , 5, 13 }, // 7854, 960
{ 2 * 3 * 7 * 11 *19, 6 * 10 * 18 , 5, 13 }, // 8778, 1080
{ 2 * 3 * 5 * 11 , 4 * 10 , 7, 13 }, // 330, 40
{ 2 * 3 * 5 * 11 *3, 4 * 10 *3, 7, 13 }, // 990, 120
{ 2 * 3 * 5 * 11 *5, 4 * 10 *5, 7, 13 }, // 1650, 200
{ 2 * 3 * 5 * 11 *9, 4 * 10 *9, 7, 13 }, // 2970, 360
{ 2 * 3 * 5 * 11 *11, 4 * 10 *11, 7, 13 }, // 3630, 440
{ 2 * 3 * 5 * 11 *15, 4 * 10 *15, 7, 13 }, // 4950, 600
{ 2 * 3 * 5 * 11 *17, 4 * 10 * 16 , 7, 13 }, // 5610, 640
{ 2 * 3 * 5 * 11 *19, 4 * 10 * 18 , 7, 13 }, // 6270, 720
{ 2 * 3 * 5 * 7 * 13 , 4 * 6 * 12 , 11, 17 }, // 2730, 288
{ 2 * 3 * 5 * 7 * 13 *3, 4 * 6 * 12 *3, 11, 17 }, // 8190, 864
{ 2 * 3 * 5 * 7 * 13 *5, 4 * 6 * 12 *5, 11, 17 }, // 13650, 1440
{ 2 * 3 * 5 * 7 * 13 *7, 4 * 6 * 12 *7, 11, 17 }, // 19110, 2016
{ 2 * 3 * 5 * 7 * 13 *9, 4 * 6 * 12 *9, 11, 17 }, // 24570, 2592
{ 2 * 3 * 5 * 7 * 13 *13, 4 * 6 * 12 *13, 11, 17 }, // 35490, 3744
{ 2 * 3 * 5 * 7 * 13 *19, 4 * 6 * 12 * 18 , 11, 17 }, // 51870, 5184
{ 2 * 3 * 5 * 11 * 13 , 4 * 10 * 12 , 7, 17 }, // 4290, 480
{ 2 * 3 * 5 * 11 * 13 *3, 4 * 10 * 12 *3, 7, 17 }, // 12870, 1440
{ 2 * 3 * 5 * 11 * 13 *5, 4 * 10 * 12 *5, 7, 17 }, // 21450, 2400
{ 2 * 3 * 5 * 11 * 13 *9, 4 * 10 * 12 *9, 7, 17 }, // 38610, 4320
{ 2 * 3 * 5 * 11 * 13 *11, 4 * 10 * 12 *11, 7, 17 }, // 47190, 5280
{ 2 * 3 * 5 * 11 * 13 *13, 4 * 10 * 12 *13, 7, 17 }, // 55770, 6240
{ 2 * 3 * 5 * 11 * 13 *15, 4 * 10 * 12 *15, 7, 17 }, // 64350, 7200
{ 2 * 3 * 5 * 11 * 13 *19, 4 * 10 * 12 * 18 , 7, 17 }, // 81510, 8640
{ 2 * 3 * 5 * 7 * 11 , 4 * 6 * 10 , 13, 17 }, // 2310, 240
{ 2 * 3 * 5 * 7 * 11 *3, 4 * 6 * 10 *3, 13, 17 }, // 6930, 720
{ 2 * 3 * 5 * 7 * 11 *5, 4 * 6 * 10 *5, 13, 17 }, // 11550, 1200
{ 2 * 3 * 5 * 7 * 11 *7, 4 * 6 * 10 *7, 13, 17 }, // 16170, 1680
{ 2 * 3 * 5 * 7 * 11 *9, 4 * 6 * 10 *9, 13, 17 }, // 20790, 2160
{ 2 * 3 * 5 * 7 * 11 *11, 4 * 6 * 10 *11, 13, 17 }, // 25410, 2640
{ 2 * 3 * 5 * 7 * 11 *15, 4 * 6 * 10 *15, 13, 17 }, // 34650, 3600
{ 2 * 3 * 5 * 7 * 11 *19, 4 * 6 * 10 * 18 , 13, 17 }, // 43890, 4320
{ 2 * 3 * 5 * 7 * 11 * 13 , 4 * 6 * 10 * 12 , 17, 19 }, // 30030, 2880
{ 2 * 3 * 5 * 7 * 11 * 13 *3, 4 * 6 * 10 * 12 *3, 17, 19 }, // 90090, 8640
{ 2 * 3 * 5 * 7 * 11 * 13 *5, 4 * 6 * 10 * 12 *5, 17, 19 }, // 150150, 14400
{ 2 * 3 * 5 * 7 * 11 * 13 *7, 4 * 6 * 10 * 12 *7, 17, 19 }, // 210210, 20160
{ 2 * 3 * 5 * 7 * 11 * 13 *9, 4 * 6 * 10 * 12 *9, 17, 19 }, // 270270, 25920
{ 2 * 3 * 5 * 7 * 11 * 13 *11, 4 * 6 * 10 * 12*11, 17, 19 }, // 330330, 31680
{ 2 * 3 * 5 * 7 * 11 * 13 *13, 4 * 6 * 10 * 12*13, 17, 19 }, // 390390, 37440
{ 2 * 3 * 5 * 7 * 11 * 13 *15, 4 * 6 * 10 * 12*15, 17, 19 }, // 450450, 43200
{ 2 * 3 * 5 * 7 * 11 * 13 *19, 4 * 6 * 10 * 12*18, 17, 23 }, // 570570, 51840
{ 2 * 3 * 5 * 7 * 11 * 13 *21, 4 * 6 * 10 * 12*21, 17, 19 }, // 630630, 60480
{ 2 * 3 * 5 * 7 * 11 * 13 *23, 4 * 6 * 10 * 12*22, 17, 19 }, // 690690, 63360
{ 2 * 3 * 5 * 7 * 11 * 13 *25, 4 * 6 * 10 * 12*25, 17, 19 }, // 750750, 72000
{ 2 * 3 * 5 * 7 * 11 * 13 *27, 4 * 6 * 10 * 12*27, 17, 19 }, // 810810, 77760
{ 2 * 3 * 5 * 7 * 11 * 13 *29, 4 * 6 * 10 * 12*28, 17, 19 }, // 870870, 80640
{ 2 * 3 * 5 * 7 * 11 * 13 *31, 4 * 6 * 10 * 12*30, 17, 19 }, // 930930, 86400
{ 2 * 3 * 5 * 7 * 11 * 13 *37, 4 * 6 * 10 * 12*36, 17, 19 }, // 1111110, 103680
{ 2 * 3 * 5 * 7 * 11 * 13 *41, 4 * 6 * 10 * 12*40, 17, 19 }, // 1231230, 115200
{ 2 * 3 * 5 * 7 * 11 * 13 *43, 4 * 6 * 10 * 12*42, 17, 19 }, // 1291290, 120960
{ 2 * 3 * 5 * 7 * 11 * 13 *47, 4 * 6 * 10 * 12*46, 17, 19 }, // 1411410, 132480
{ 2 * 3 * 5 * 7 * 11 * 13 *53, 4 * 6 * 10 * 12*52, 17, 19 }, // 1591590, 149760
{ 2 * 3 * 5 * 7 * 11 * 13 *59, 4 * 6 * 10 * 12*58, 17, 19 }, // 1771770, 167040
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 , 4 * 6 * 10 * 12 * 16 , 19, 23 }, // 510510, 46080
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *3, 4 * 6 * 10 * 12 * 16 *3, 19, 23 }, // 1531530, 138240
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *5, 4 * 6 * 10 * 12 * 16 *5, 19, 23 }, // 2552550, 230400
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *7, 4 * 6 * 10 * 12 * 16 *7, 19, 23 }, // 3573570, 322560
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *9, 4 * 6 * 10 * 12 * 16 *9, 19, 23 }, // 4594590, 414720
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *11, 4 * 6 * 10 * 12 * 16*11, 19, 23 }, // 5615610, 506880
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *13, 4 * 6 * 10 * 12 * 16*13, 19, 23 }, // 6636630, 599040
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *15, 4 * 6 * 10 * 12 * 16*15, 19, 23 }, // 7657650, 691200
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *17, 4 * 6 * 10 * 12 * 16*17, 19, 23 }, // 8678670, 783360
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *25, 4 * 6 * 10 * 12 * 16*25, 19, 23 }, // 12762750, 1152000
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *35, 4 * 6 * 10 * 12 * 16*35, 19, 23 }, // 17867850, 1612800
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *45, 4 * 6 * 10 * 12 * 16*45, 19, 23 }, // 22972950, 2073600
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 , 4 * 6 * 10 * 12 * 16 * 18 , 23, 29 }, // 9699690, 829440
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *3, 4 * 6 * 10 * 12 * 16 * 18 *3, 23, 29 }, // 29099070, 2488320
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *5, 4 * 6 * 10 * 12 * 16 * 18 *5, 23, 29 }, // 48498450, 4147200
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *7, 4 * 6 * 10 * 12 * 16 * 18 *7, 23, 29 }, // 67897830, 5806080
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *9, 4 * 6 * 10 * 12 * 16 * 18 *9, 23, 29 }, // 87297210, 7464960
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *11, 4 * 6 * 10 * 12 * 16 * 18*11, 23, 29 }, // 106696590, 9123840
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *13, 4 * 6 * 10 * 12 * 16 * 18*13, 23, 29 }, // 126095970, 10782720
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *15, 4 * 6 * 10 * 12 * 16 * 18*15, 23, 29 }, // 145495350, 12441600
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *17, 4 * 6 * 10 * 12 * 16 * 18*17, 23, 29 }, // 164894730, 14100480
};
#endif
struct D_data poly_D_data[] = {
{ 2 * 3 * 5 , 4 , 7, 11 }, // 30, 4
{ 2 * 3 * 7 , 6 , 5, 11 }, // 42, 6
{ 2 * 3 * 9, 1 *9, 5, 7 }, // 54, 9
{ 2 * 3 *11, 10 , 5, 7 }, // 66, 10
{ 2 * 3 * 5 * 3, 4 *3, 7, 11 }, // 90, 12
{ 2 * 3 *17, 16 , 5, 7 }, // 102, 16
{ 2 * 3 * 7 * 3, 6 *3, 5, 11 }, // 126, 18
{ 2 * 3 * 5 * 5, 4 *5, 7, 11 }, // 150, 20
{ 2 * 3 * 5 * 7 , 4 * 6 , 11, 13 }, // 210, 24
{ 2 * 3 * 5 * 9, 4 *9, 7, 11 }, // 270, 36
{ 2 * 3 * 5 * 11 , 4 * 10 , 7, 13 }, // 330, 40
{ 2 * 3 * 5 *13, 4 * 12 , 7, 11 }, // 390, 48
{ 2 * 3 * 7 * 11 , 6 * 10 , 5, 13 }, // 462, 60
{ 2 * 3 * 5 *17, 4 * 16 , 7, 11 }, // 510, 64
{ 2 * 3 * 5 * 7 * 3, 4 * 6 *3, 11, 13 }, // 630, 72
{ 2 * 3 * 7 *17, 6 * 16 , 5, 11 }, // 714, 96
{ 2 * 3 * 7 *19, 6 * 18 , 5, 11 }, // 798, 108
{ 2 * 3 * 5 * 7 * 5, 4 * 6 *5, 11, 13 }, // 1050, 120
{ 2 * 3 * 5 * 7 * 7, 4 * 6 *7, 11, 13 }, // 1470, 168
{ 2 * 3 * 5 * 11 *5, 4 * 10 *5, 7, 13 }, // 1650, 200
{ 2 * 3 * 5 * 7 * 9, 4 * 6 *9, 11, 13 }, // 1890, 216
{ 2 * 3 * 5 * 7 * 11 , 4 * 6 * 10 , 13, 17 }, // 2310, 240
{ 2 * 3 * 5 * 7 * 13 , 4 * 6 * 12 , 11, 17 }, // 2730, 288
{ 2 * 3 * 5 * 7 *15, 4 * 6 *15, 11, 13 }, // 3150, 360
{ 2 * 3 * 5 * 7 *17, 4 * 6 * 16 , 11, 13 }, // 3570, 384
{ 2 * 3 * 5 * 7 *19, 4 * 6 * 18 , 11, 13 }, // 3990, 432
{ 2 * 3 * 5 * 11 * 13 , 4 * 10 * 12 , 7, 17 }, // 4290, 480
{ 2 * 3 * 5 * 11 *15, 4 * 10 *15, 7, 13 }, // 4950, 600
{ 2 * 3 * 5 * 11 *17, 4 * 10 * 16 , 7, 13 }, // 5610, 640
{ 2 * 3 * 5 * 7 * 11 *3, 4 * 6 * 10 *3, 13, 17 }, // 6930, 720
{ 2 * 3 * 5 * 7 * 13 *3, 4 * 6 * 12 *3, 11, 17 }, // 8190, 864
{ 2 * 3 * 7 * 11 *19, 6 * 10 * 18 , 5, 13 }, // 8778, 1080
{ 2 * 3 * 5 * 7 * 11 *5, 4 * 6 * 10 *5, 13, 17 }, // 11550, 1200
{ 2 * 3 * 5 * 7 * 13 *5, 4 * 6 * 12 *5, 11, 17 }, // 13650, 1440
{ 2 * 3 * 5 * 7 * 11 *7, 4 * 6 * 10 *7, 13, 17 }, // 16170, 1680
{ 2 * 3 * 5 * 7 * 13 *7, 4 * 6 * 12 *7, 11, 17 }, // 19110, 2016
{ 2 * 3 * 5 * 7 * 11 *9, 4 * 6 * 10 *9, 13, 17 }, // 20790, 2160
{ 2 * 3 * 5 * 11 * 13 *5, 4 * 10 * 12 *5, 7, 17 }, // 21450, 2400
{ 2 * 3 * 5 * 7 * 13 *9, 4 * 6 * 12 *9, 11, 17 }, // 24570, 2592
{ 2 * 3 * 5 * 7 * 11 *11, 4 * 6 * 10 *11, 13, 17 }, // 25410, 2640
{ 2 * 3 * 5 * 7 * 11 * 13 , 4 * 6 * 10 * 12 , 17, 19 }, // 30030, 2880
{ 2 * 3 * 5 * 7 * 11 *15, 4 * 6 * 10 *15, 13, 17 }, // 34650, 3600
{ 2 * 3 * 5 * 7 * 13 *13, 4 * 6 * 12 *13, 11, 17 }, // 35490, 3744
{ 2 * 3 * 5 * 7 * 11 *19, 4 * 6 * 10 * 18 , 13, 17 }, // 43890, 4320
{ 2 * 3 * 5 * 7 * 13 *19, 4 * 6 * 12 * 18 , 11, 17 }, // 51870, 5184
{ 2 * 3 * 5 * 11 * 13 *13, 4 * 10 * 12 *13, 7, 17 }, // 55770, 6240
{ 2 * 3 * 5 * 11 * 13 *15, 4 * 10 * 12 *15, 7, 17 }, // 64350, 7200
{ 2 * 3 * 5 * 7 * 11 * 13 *3, 4 * 6 * 10 * 12 *3, 17, 19 }, // 90090, 8640
{ 2 * 3 * 5 * 7 * 11 * 13 *5, 4 * 6 * 10 * 12 *5, 17, 19 }, // 150150, 14400
{ 2 * 3 * 5 * 7 * 11 * 13 *7, 4 * 6 * 10 * 12 *7, 17, 19 }, // 210210, 20160
{ 2 * 3 * 5 * 7 * 11 * 13 *9, 4 * 6 * 10 * 12 *9, 17, 19 }, // 270270, 25920
{ 2 * 3 * 5 * 7 * 11 * 13 *11, 4 * 6 * 10 * 12*11, 17, 19 }, // 330330, 31680
{ 2 * 3 * 5 * 7 * 11 * 13 *13, 4 * 6 * 10 * 12*13, 17, 19 }, // 390390, 37440
{ 2 * 3 * 5 * 7 * 11 * 13 *15, 4 * 6 * 10 * 12*15, 17, 19 }, // 450450, 43200
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 , 4 * 6 * 10 * 12 * 16 , 19, 23 }, // 510510, 46080
{ 2 * 3 * 5 * 7 * 11 * 13 *19, 4 * 6 * 10 * 12*18, 17, 23 }, // 570570, 51840
{ 2 * 3 * 5 * 7 * 11 * 13 *21, 4 * 6 * 10 * 12*21, 17, 19 }, // 630630, 60480
{ 2 * 3 * 5 * 7 * 11 * 13 *23, 4 * 6 * 10 * 12*22, 17, 19 }, // 690690, 63360
{ 2 * 3 * 5 * 7 * 11 * 13 *25, 4 * 6 * 10 * 12*25, 17, 19 }, // 750750, 72000
{ 2 * 3 * 5 * 7 * 11 * 13 *27, 4 * 6 * 10 * 12*27, 17, 19 }, // 810810, 77760
{ 2 * 3 * 5 * 7 * 11 * 13 *29, 4 * 6 * 10 * 12*28, 17, 19 }, // 870870, 80640
{ 2 * 3 * 5 * 7 * 11 * 13 *31, 4 * 6 * 10 * 12*30, 17, 19 }, // 930930, 86400
{ 2 * 3 * 5 * 7 * 11 * 13 *37, 4 * 6 * 10 * 12*36, 17, 19 }, // 1111110, 103680
{ 2 * 3 * 5 * 7 * 11 * 13 *41, 4 * 6 * 10 * 12*40, 17, 19 }, // 1231230, 115200
{ 2 * 3 * 5 * 7 * 11 * 13 *43, 4 * 6 * 10 * 12*42, 17, 19 }, // 1291290, 120960
{ 2 * 3 * 5 * 7 * 11 * 13 *47, 4 * 6 * 10 * 12*46, 17, 19 }, // 1411410, 132480
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *3, 4 * 6 * 10 * 12 * 16 *3, 19, 23 }, // 1531530, 138240
{ 2 * 3 * 5 * 7 * 11 * 13 *53, 4 * 6 * 10 * 12*52, 17, 19 }, // 1591590, 149760
{ 2 * 3 * 5 * 7 * 11 * 13 *59, 4 * 6 * 10 * 12*58, 17, 19 }, // 1771770, 167040
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *5, 4 * 6 * 10 * 12 * 16 *5, 19, 23 }, // 2552550, 230400
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *7, 4 * 6 * 10 * 12 * 16 *7, 19, 23 }, // 3573570, 322560
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *9, 4 * 6 * 10 * 12 * 16 *9, 19, 23 }, // 4594590, 414720
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *11, 4 * 6 * 10 * 12 * 16*11, 19, 23 }, // 5615610, 506880
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *13, 4 * 6 * 10 * 12 * 16*13, 19, 23 }, // 6636630, 599040
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *15, 4 * 6 * 10 * 12 * 16*15, 19, 23 }, // 7657650, 691200
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *17, 4 * 6 * 10 * 12 * 16*17, 19, 23 }, // 8678670, 783360
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 , 4 * 6 * 10 * 12 * 16 * 18 , 23, 29 }, // 9699690, 829440
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *25, 4 * 6 * 10 * 12 * 16*25, 19, 23 }, // 12762750, 1152000
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *35, 4 * 6 * 10 * 12 * 16*35, 19, 23 }, // 17867850, 1612800
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 *45, 4 * 6 * 10 * 12 * 16*45, 19, 23 }, // 22972950, 2073600
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *3, 4 * 6 * 10 * 12 * 16 * 18 *3, 23, 29 }, // 29099070, 2488320
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *5, 4 * 6 * 10 * 12 * 16 * 18 *5, 23, 29 }, // 48498450, 4147200
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *7, 4 * 6 * 10 * 12 * 16 * 18 *7, 23, 29 }, // 67897830, 5806080
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *9, 4 * 6 * 10 * 12 * 16 * 18 *9, 23, 29 }, // 87297210, 7464960
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *11, 4 * 6 * 10 * 12 * 16 * 18*11, 23, 29 }, // 106696590, 9123840
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *13, 4 * 6 * 10 * 12 * 16 * 18*13, 23, 29 }, // 126095970, 10782720
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *15, 4 * 6 * 10 * 12 * 16 * 18*15, 23, 29 }, // 145495350, 12441600
{ 2 * 3 * 5 * 7 * 11 * 13 * 17 * 19 *17, 4 * 6 * 10 * 12 * 16 * 18*17, 23, 29 }, // 164894730, 14100480
};
#define POLY_NUM_D (sizeof(poly_D_data)/sizeof(struct D_data))
#define POLY_MAX_D 164894730
#define POLY_MAX_RELPRIMES 14100480
// Map a D value to the D_data index
int map_D_to_index (int D)
{
for (int i = 0; i < NUM_D; i++) if (D_data[i].D == D) return (i);
ASSERTG (0);
return (0);
}
// Map a poly D value to the poly_D_data index
int map_polyD_to_index (int D)
{
for (int i = 0; i < POLY_NUM_D; i++) if (poly_D_data[i].D == D) return (i);
ASSERTG (0);
return (0);
}
// Return minimum number of relprimes for a D (same as number of relative primes less than D/2)
#define map_D_to_numrels(D) D_data[map_D_to_index(D)].numrels
// Return first_missing_prime for a poly D
#define map_polyD_to_first_missing_prime(D) poly_D_data[map_polyD_to_index(D)].first_missing_prime
/* Structure used to pass data to and return data from ECM, P-1, P+1 stage 2 costing routines */
/* Originally this was only used for the prime pairing algorithm. Now it also used to analyze polymult-based algorithms. */
struct common_cost_data {
/* Data sent to cost function follows */
gwhandle *gwdata; /* Gwnum handle (not required) */
pmhandle *polydata; /* Polymult handle. If costing a polymult must be minimally initialized for polymult_mem_required. */
long stage1_fftlen; /* FFT length being used in stage 2 */
long stage2_fftlen; /* FFT length being used in stage 2 */
double stage1_cost; /* Cost (in stage 1 FFTs) of stage 1 */
double gcd_cost; /* Cost (in stage 1 FFTs) of a GCD */
double modinv_cost; /* Cost (in stage 1 FFTs) of a modular inverse */
double poly_compression; /* Expected compression from polymult_preprocess for polynomials that can be compressed. */
int stage1_threads; /* Number of threads used during stage 1 */
int stage2_threads; /* Number of threads used during stage 2 */
uint64_t numvals; /* Maximum number of gwnum temporaries that can be allocated for relative prime data, pairmaps, */
/* ECM pooling. Additional gwnums needed by ECM/P-1/P+1 are not included in this maximum number. */
bool use_poly_D_data; /* Set to TRUE if selecting from the alternate D_data designed for polymult */
bool centers_on_Dmultiple; /* Set to TRUE if pairing is centered on a multiple of D (as opposed to odd multiple of D/2) */
int required_missing; /* An interrupted polymult stage 2 relocated primes using this small prime. New D must not be a multiple of this value. */
uint64_t gap_start; /* last_relocatable or zero (when pairmaps are split, c_start to gap_start are the remaining relocatables to pair) */
uint64_t gap_end; /* a.k.a C_done (when pairmaps are split, gap_end to C are the remaining primes to pair) */
/* Data returned from cost function follows */
double max_pairmap_size; /* Maximum size of the pairing map */
int D; /* D value for big steps */
int totrels; /* Total number of relative primes used for pairing */
int numrels; /* Number of relative primes less than D / 2 */
double multiplier; /* Totrels / numrels */
int first_missing_prime; /* First missing prime - used to calculate B2_start */
int16_t *relp_sets; /* The relp_sets to use in stage 2 */
uint64_t B1; /* Stage 1 end */
uint64_t B2; /* Stage 2 end */
uint64_t B2_start; /* Stage 2 start */
uint64_t numDsections; /* Number of D sections to process */
uint64_t max_pairmap_Dsections; /* Number of D sections that can fit in a pairing map */
int numvals_consumed_by_pairmap; /* Number of gwnums no longer available due to large pairing map */
double est_pair_pct; /* Estimated pairing percentage */
double est_numprimes; /* Expected number of primes in stage 2 */
double est_numpairs; /* Expected number of pairs in stage 2 */
double est_numsingles; /* Expected number of singles in stage 2 */
double est_pairing_runtime; /* Estimated stage 2 pairing cost (expressed as equivalent number of transforms) */
double est_init_transforms; /* Estimated stage 2 init cost (expressed as number of transforms) */
double est_stage2_transforms; /* Estimated stage 2 main loop cost (expressed as number of transforms) */
double est_init_polymult; /* Estimated stage 2 init polymult cost (expressed as number of equivalent gwnum transforms) */
double est_stage2_polymult; /* Estimated stage 2 main loop polymult cost (expressed as number of equivalent gwnum transforms) */
double est_stage2_stage1_ratio;/* Estimated stage 2 runtime / stage 1 runtime ratio */
uint64_t stage2_numvals; /* Returned total number of gwnum temporaries that will be used in stage 2 */
double stage2_cost; /* Cost (in stage 1 FFTs) of stage 2 */
double total_cost; /* Cost for stage 1, stage 2, and GCD */
double efficiency; /* "Value" of B1/B2 combo vs. total_cost */
};
/* Calculate the GCD and modular inverse cost in terms of number of transforms. */
/* The costs come from the timing code running ECM on M604 and spreadsheeting. */
/* Since GCDs are single-threaded we increase for the costs for multi-threaded runs. */
void init_gcd_costs (
double bit_length, // Bit length of number we are GCDing
void *cost_func_data) // Cost_data structure to initialize
{
struct common_cost_data *c = (struct common_cost_data *) cost_func_data;
c->gcd_cost = 320.53 * log (bit_length) - 3302.0;
if (c->gcd_cost < 100.0) c->gcd_cost = 100.0;
c->modinv_cost = 570.16 * log (bit_length) - 6188.4;
if (c->modinv_cost < 1.25 * c->gcd_cost) c->modinv_cost = 1.25 * c->gcd_cost;
if (c->stage2_threads == 2) c->gcd_cost *= 1.9, c->modinv_cost *= 1.9;
else if (c->stage2_threads == 3) c->gcd_cost *= 2.7, c->modinv_cost *= 2.7;
else if (c->stage2_threads >= 4) c->gcd_cost *= 3.2, c->modinv_cost *= 3.2;
}
/* Select the best D value for the given the number of temporary gwnums that can be allocated. We trade off more D steps vs. better */
/* prime pairing vs. different B2 start points using the ECM, P-1, or P+1 costing function. */
/* Returns the cost. Cost function can return more information, such as best ECM pooling type. */
double best_stage2_impl_internal (
uint64_t B, /* Bound #1 */
uint64_t C_start, /* Starting point for bound #2 (usually B1, but can be different for save files and required multiple pairmaps) */
uint64_t C, /* Bound #2 */
uint64_t max_numvals, /* Number of gwnum temporaries that can be used. Different than c->numvals when binary searching on numvals. */
double (*cost_func)(void *), /* ECM, P-1 or P+1 costing function */
void *cost_func_data) /* User-supplied data to pass to the costing function */
{
struct common_cost_data *c = (struct common_cost_data *) cost_func_data;
uint64_t B2_end;
float pairing_percentage, pairing_runtime;
double efficiency, best_efficiency;
int D, i, best_i, j;
/* Select which D_data to use */
struct D_data *d_data; /* Prime pairing or polymult D_data */
int num_d; /* Number of D_data entries */
if (c->use_poly_D_data) {
d_data = poly_D_data;
num_d = POLY_NUM_D;
} else {
d_data = D_data;
num_d = NUM_D;
}
/* Kludge to make one pass finding the best D value and a second pass to re-call the cost function using the best D. */
/* Re-calling the cost function allows us to pass back any data that P-1, P+1, or ECM may need to save. */
efficiency = best_efficiency = -1.0;
for (j = 0; j < 2; j++) {
/* Try various values of D until we find the best one. */
for (i = 0; i < num_d; i++) {
/* On second pass only cost the best D from the first pass */
if (j == 1) {
if (best_efficiency < 0.0) break;
i = best_i;
}
/* Check if this D value would require using too many gwnum temporaries */
if (d_data[i].numrels > max_numvals) break;
/* We move the smaller primes to a composite value higher up in the B1 to B2 range to improve our pairing chances and */
/* reduce the number of D sections to process. Calculate B2_start - the first prime that cannot be relocated higher. */
D = d_data[i].D;
// Compute values needed by the costing functions
c->B1 = B;
c->B2 = C;
c->D = D;
c->numrels = d_data[i].numrels;
c->first_missing_prime = d_data[i].first_missing_prime;
// Prime pairing "centers" on a multiple of D, thus start and end points are an odd multiple of D/2
if (c->centers_on_Dmultiple) {
B2_end = round_up_to_multiple_of (C - D / 2, D) + D / 2;
c->B2_start = (uint64_t) floor ((double) (B2_end / (double) d_data[i].first_missing_prime));
if (c->B2_start < C_start) c->B2_start = C_start;
if (c->gap_end && c->B2_start < c->gap_end) c->B2_start = c->gap_end;
if (c->B2_start < D / 2) break; // Protects against next line returning negative result
c->B2_start = round_down_to_multiple_of (c->B2_start + D / 2, D) - D / 2;
}
// P-1 polymult pairing "centers" on odd multiples of D/2, thus start and end points are multiples of D
else {
B2_end = round_up_to_multiple_of (C, D);
c->B2_start = (uint64_t) floor ((double) (B2_end / (double) d_data[i].first_missing_prime));
if (c->B2_start < C_start) c->B2_start = C_start;
if (c->gap_end && c->B2_start < c->gap_end) c->B2_start = c->gap_end;
c->B2_start = round_down_to_multiple_of (c->B2_start, D);
}
c->numDsections = (B2_end - c->B2_start) / D;
/* Perform pairing-specific and polymult-specific initializations */
if (!c->use_poly_D_data) {
/* When pairmaps are split, the last_relocatable set by the first pairmap restricts which D values can now be used */
if (c->gap_start && c->gap_start * d_data[i].first_missing_prime > B2_end) continue;
/* Estimate our prime pairing percentage and number of D sections that can fit in a pairing map */
/* We only support relp_sets up to 20 entries */
c->totrels = (max_numvals > MAX_RELP_SETS * d_data[i].numrels) ? MAX_RELP_SETS * d_data[i].numrels : (int) max_numvals;
estimate_pairing (d_data[i].second_missing_prime, C_start, C, c->totrels, d_data[i].numrels, &pairing_percentage, &pairing_runtime);
c->est_pair_pct = pairing_percentage;
c->est_numpairs = (pairing_percentage * c->est_numprimes) / 2.0;
c->est_numsingles = c->est_numprimes - c->est_numpairs * 2.0;
double est_totpairs = c->est_numpairs + c->est_numsingles; // Also acts as estimated size of the pairing map
c->max_pairmap_Dsections = estimate_max_pairmap_Dsections (c->max_pairmap_size, c->numDsections, est_totpairs);
c->numvals_consumed_by_pairmap = (int)
((est_totpairs > c->max_pairmap_size ? c->max_pairmap_size : est_totpairs) / (c->stage2_fftlen * sizeof (double)));
// Catch cases where not enough memory for totrels and the pairing map
if (c->totrels + c->numvals_consumed_by_pairmap > max_numvals) continue;
/* Estimate our cost of finding all the prime pairs. These formulas came from running single threaded timings on 12K, 512K, 5760K FFTs on the */
/* same machine that did the pairing runtimes. Thus, this converts (roughly) pairing runtimes into an equivalent cost in number-of-transforms. */
/* Squaring timings up to 512K: 1.69ms * 2^(log2(FFTsize/512K)*1.0407) */
/* Squaring timings above 512K: 1.69ms * 2^(log2(FFTsize/512K)*1.093) */
if (c->stage2_fftlen < 512*1024)
c->est_pairing_runtime = pairing_runtime / (0.00169 * pow(2.0,log2((double)c->stage2_fftlen/(512.0*1024.0))*1.0407)) * 2.0;
else
c->est_pairing_runtime = pairing_runtime / (0.00169 * pow(2.0,log2((double)c->stage2_fftlen/(512.0*1024.0))*1.093)) * 2.0;
// Make some adjustments for multi-threaded FFTs vs. single-threaded pairing
if (c->stage2_threads == 2) c->est_pairing_runtime *= 1.9;
else if (c->stage2_threads == 3) c->est_pairing_runtime *= 2.7;
else if (c->stage2_threads >= 4) c->est_pairing_runtime *= 3.2;
/* Pick the relp_sets to use. Someday we should expand the options for which relp_sets are tried (don't limit to just the easy sets). */
c->multiplier = (double) c->totrels / (double) c->numrels;
c->relp_sets = relp_set_selection ((int) ceil (c->multiplier));
}
/* When a polymult stage 2 starts, small primes are "moved" to a multiple of first_missing_prime. When polymult stage 2 resumes, we cannot select a */
/* D value that includes the original first_missing_prime as that would miss some of these moved small primes. */
else {
if (c->required_missing && D % c->required_missing == 0) continue;
}
/* Calculate the cost of this stage 2 plan */
efficiency = (*cost_func) (cost_func_data);
//{char buf[200];sprintf (buf, "D: %d, totrels/numrels: %.2f, efficiency: %.2f (%.5f%% %.2f %.2f %.2f)\n", D, c->multiplier, efficiency,
// c->est_pair_pct*100.0,c->est_pairing_runtime,c->est_init_transforms,c->est_stage2_transforms); OutputStr(MAIN_THREAD_NUM, buf);}
/* On second pass, break out of loop after costing the best D from the first pass */
if (j == 1) break;
/* Remember best efficiency and best D */
if (efficiency > best_efficiency) {
best_efficiency = efficiency;
best_i = i;
}
/* Break out of loop after a couple of failed attempts at improving our best (we're going in the wrong direction) */
else {
if (best_efficiency >= 0.0 && i > best_i + 2) break;
}
}
}
/* Return best efficiency */
return (efficiency);
}
/* Binary search for the best number of gwnums to allocate and the best D value to use. Caller specifies the maximum number of gwnums */
/* that can be allocated. We trade off more D steps vs. better prime pairing vs. different B2 start points using the ECM, P-1, or P+1 costing function. */
/* Returns the cost. Cost function can return more information, such as best D value, B2_start, B2_end. */
double best_stage2_impl (
uint64_t B, /* Bound #1 */
uint64_t C_start, /* Starting point for bound #2 -- usually bound #1 */
uint64_t gap_start, /* last_relocatable or zero (when pairmaps are split, c_start to gap_start are the remaining relocatables to pair) */
uint64_t gap_end, /* a.k.a C_done (when pairmaps are split, gap_end to C are the remaining primes to pair) */
uint64_t C, /* Bound #2 */
uint64_t numvals, /* Number of gwnum temporaries that can be used for storing relative prime data (or other uses such as pooling) */
double (*cost_func)(void *), /* ECM, P-1, or P+1 costing function */
void *cost_func_data) /* User-supplied data to pass to the costing function */
{
struct common_cost_data *c = (struct common_cost_data *) cost_func_data;
struct best_stage2_cost {
uint64_t numvals;
double efficiency;
} best[3], midpoint;
/* Sanity check and save the gap parameters */
if (gap_start > gap_end) gap_start = gap_end = 0;
ASSERTG (gap_start == 0 || gap_start >= C_start);
ASSERTG (gap_start == 0 || gap_end < C);
c->gap_start = (gap_start > C_start ? gap_start : 0);
c->gap_end = (gap_end > C_start ? gap_end : 0);
/* Return negative efficiency if numvals is less than D=30 needs */
c->numvals = numvals;
if (c->numvals < 4) return (-1.0);
/* Determine the maximum pairmap size. The costing functions may increase the stage 2 setup costs if the pairmap must be split in chunks. */
if (!c->use_poly_D_data) {
int max_pairmap_size = IniGetInt (INI_FILE, "MaximumBitArraySize", 250);
if (max_pairmap_size > 2000) max_pairmap_size = 2000;
if (max_pairmap_size < 1) max_pairmap_size = 1;
c->max_pairmap_size = (double) max_pairmap_size * 1000000.0;
// Don't let the pairmap consume more than half the numvals (this may not be optimal). Not worth investigating as polymult is the optimal choice.
if (c->max_pairmap_size > (double) numvals / 2 * c->stage2_fftlen * sizeof (double))
c->max_pairmap_size = (double) numvals / 2 * c->stage2_fftlen * sizeof (double);
}
/* Estimate the number of primes between B1 and B2 */
c->est_numprimes = primes_less_than (C) - primes_less_than (C_start);
if (gap_start) c->est_numprimes -= primes_less_than (gap_end) - primes_less_than (gap_start);
/* When binary searching on numvals is not needed (with poly stage 2 maximum numvals is always best), then just do one call to best_stage2_impl_internal */
if (c->use_poly_D_data) return (best_stage2_impl_internal (B, C_start, C, c->numvals, cost_func, cost_func_data));
/* Prepare for a binary search looking for the most efficienct stage 2 implementation varying the number of gwnums available for relative primes data. */
best[0].numvals = 4;
best[0].efficiency = best_stage2_impl_internal (B, C_start, C, best[0].numvals, cost_func, cost_func_data);
if (numvals == 4) return (best[0].efficiency);
best[2].numvals = c->numvals;
best[2].efficiency = best_stage2_impl_internal (B, C_start, C, best[2].numvals, cost_func, cost_func_data);
if (numvals == 5) return (best[2].efficiency);
best[1].numvals = (c->numvals + 4) / 2;
best[1].efficiency = best_stage2_impl_internal (B, C_start, C, best[1].numvals, cost_func, cost_func_data);
//for (int i = 200; i < 1685; i++) {
//char buf[200];