-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCPU jobs; standard.cpp
121 lines (112 loc) · 8.13 KB
/
CPU jobs; standard.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/************************************************************
* File: CPU jobs; standard.cpp Created: 2025/01/21 *
* Last mod.: 2025/02/22 *
* *
* Desc: *
* *
* MIT license Copyright (c) David William Bull *
************************************************************/
#include <cmath>
#include <typedefs.h>
#ifndef UNLOOPx4
#define UNLOOPx4(code) code code code code
#endif
// Non-simultaneous ALU operations only
void JobALU(si64& x) {
for(ui8 i = 0; i < 16; ++i) {
UNLOOPx4(
x *= 789ull / 13 + 501; x = ((i < 32 ? x << 1 : x >> 1) ^ -1) / 7 - 294939;
x *= 791ull / 14 + 502; x = ((i < 32 ? x << 1 : x >> 1) ^ -1) / 9 - 294941;
x *= 789ull / 13 + 501; x = ((i < 32 ? x << 1 : x >> 1) ^ -1) / 7 - 294939;
x *= 787ull / 11 + 500; x = ((i < 32 ? x << 1 : x >> 1) ^ -1) / 5 - 294937;
)
}
}
// Non-simultaneous FPU operations only
void JobFPU(fl64& x) {
for(ui8 i = 0; i < 16; ++i) {
UNLOOPx4(
x = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x / 2.01))) + 0.0001);
x = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x / 2.011))) + 0.001);
x = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x / 2.01))) + 0.01);
x = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x / 2.009))) + 0.1);
)
x *= x * 1.01010101010101 + 0.00021;
}
}
// ALU + FPU operations only
void JobALU_FPU(fl64& x, si64& y) {
for(ui8 i = 0; i < 16; ++i) {
UNLOOPx4(
x = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x / 2.01))) + 0.0001);
y *= 789ull / 13 + 501; y = ((i < 32 ? y << 1 : y >> 1) ^ -1) / 7 - 294939;
x = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x / 2.011))) + 0.001);
y *= 791ull / 14 + 502; y = ((i < 32 ? y << 1 : y >> 1) ^ -1) / 9 - 294941;
x = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x / 2.01))) + 0.01);
y *= 789ull / 13 + 501; y = ((i < 32 ? y << 1 : y >> 1) ^ -1) / 7 - 294939;
x = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x / 2.009))) + 0.1);
y *= 787ull / 11 + 500; y = ((i < 32 ? y << 1 : y >> 1) ^ -1) / 5 - 294937;
)
x *= x * 1.01010101010101 + 0.00021;
}
}
// Memory-loaded non-simultaneous ALU operations only
void JobMemALU(si64ptrc x) {
for(ui8 i = 0; i < 16; ++i) {
UNLOOPx4(
x[0] *= 789ull / 13 + 501; x[2] *= 789ull / 13 + 501; x[1] *= 789ull / 13 + 501; x[3] *= 789ull / 13 + 501;
x[0] = ((i < 32 ? x[0] << 1 : x[0] >> 1) ^ -1) / 7 - 294939; x[2] = ((i < 32 ? x[2] << 1 : x[2] >> 1) ^ -1) / 7 - 294939;
x[1] = ((i < 32 ? x[1] << 1 : x[1] >> 1) ^ -1) / 7 - 294939; x[3] = ((i < 32 ? x[3] << 1 : x[3] >> 1) ^ -1) / 7 - 294939;
x[0] *= 791ull / 14 + 502; x[2] *= 791ull / 14 + 502; x[1] *= 791ull / 14 + 502; x[3] *= 791ull / 14 + 502;
x[0] = ((i < 32 ? x[0] << 1 : x[0] >> 1) ^ -1) / 9 - 294941; x[2] = ((i < 32 ? x[2] << 1 : x[2] >> 1) ^ -1) / 9 - 294941;
x[1] = ((i < 32 ? x[1] << 1 : x[1] >> 1) ^ -1) / 9 - 294941; x[3] = ((i < 32 ? x[3] << 1 : x[3] >> 1) ^ -1) / 9 - 294941;
x[0] *= 789ull / 13 + 501; x[2] *= 789ull / 13 + 501; x[1] *= 789ull / 13 + 501; x[3] *= 789ull / 13 + 501;
x[0] = ((i < 32 ? x[0] << 1 : x[0] >> 1) ^ -1) / 7 - 294939; x[2] = ((i < 32 ? x[2] << 1 : x[2] >> 1) ^ -1) / 7 - 294939;
x[1] = ((i < 32 ? x[1] << 1 : x[1] >> 1) ^ -1) / 7 - 294939; x[3] = ((i < 32 ? x[3] << 1 : x[3] >> 1) ^ -1) / 7 - 294939;
x[0] *= 787ull / 11 + 500; x[2] *= 787ull / 11 + 500; x[1] *= 787ull / 11 + 500; x[3] *= 787ull / 11 + 500;
x[0] = ((i < 32 ? x[0] << 1 : x[0] >> 1) ^ -1) / 5 - 294937; x[2] = ((i < 32 ? x[2] << 1 : x[2] >> 1) ^ -1) / 5 - 294937;
x[1] = ((i < 32 ? x[1] << 1 : x[1] >> 1) ^ -1) / 5 - 294937; x[3] = ((i < 32 ? x[3] << 1 : x[3] >> 1) ^ -1) / 5 - 294937;
)
}
}
// Memory-loaded non-simultaneous FPU operations only
void JobMemFPU(fl64ptrc x) {
for(ui8 i = 0; i < 16; ++i) {
UNLOOPx4(
x[0] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[0] / 2.01))) + 0.0001); x[2] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[2] / 2.01))) + 0.0001);
x[1] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[1] / 2.01))) + 0.0001); x[3] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[3] / 2.01))) + 0.0001);
x[0] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[0] / 2.011))) + 0.001); x[2] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[2] / 2.011))) + 0.001);
x[1] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[1] / 2.011))) + 0.001); x[3] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[3] / 2.011))) + 0.001);
x[0] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[0] / 2.01))) + 0.01); x[2] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[2] / 2.01))) + 0.01);
x[1] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[1] / 2.01))) + 0.01); x[3] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[3] / 2.01))) + 0.01);
x[0] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[0] / 2.009))) + 0.1); x[2] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[2] / 2.009))) + 0.1);
x[1] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[1] / 2.009))) + 0.1); x[3] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[3] / 2.009))) + 0.1);
)
x[0] *= x[0] * 1.01010101010101 + 0.00021; x[2] *= x[2] * 1.01010101010101 + 0.00021;
x[1] *= x[1] * 1.01010101010101 + 0.00021; x[3] *= x[3] * 1.01010101010101 + 0.00021;
}
}
void JobMemALU_FPU(fl64ptrc x, si64ptrc y) {
for(ui8 i = 0; i < 16; ++i) {
UNLOOPx4(
x[0] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[0] / 2.01))) + 0.0001); y[0] *= 789ull / 13 + 501; y[0] = ((i < 32 ? y[0] << 1 : y[0] >> 1) ^ -1) / 7 - 294939;
x[2] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[2] / 2.01))) + 0.0001); y[2] *= 789ull / 13 + 501; y[2] = ((i < 32 ? y[2] << 1 : y[2] >> 1) ^ -1) / 7 - 294939;
x[1] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[1] / 2.01))) + 0.0001); y[1] *= 789ull / 13 + 501; y[1] = ((i < 32 ? y[1] << 1 : y[1] >> 1) ^ -1) / 7 - 294939;
x[3] = sqrt(1.12) / (abs(1.0 - sqrt(sqrt(x[3] / 2.01))) + 0.0001); y[3] *= 789ull / 13 + 501; y[3] = ((i < 32 ? y[3] << 1 : y[3] >> 1) ^ -1) / 7 - 294939;
x[0] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[0] / 2.011))) + 0.001); y[0] *= 791ull / 14 + 502; y[0] = ((i < 32 ? y[0] << 1 : y[0] >> 1) ^ -1) / 9 - 294941;
x[2] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[2] / 2.011))) + 0.001); y[2] *= 791ull / 14 + 502; y[2] = ((i < 32 ? y[2] << 1 : y[2] >> 1) ^ -1) / 9 - 294941;
x[1] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[1] / 2.011))) + 0.001); y[1] *= 791ull / 14 + 502; y[1] = ((i < 32 ? y[1] << 1 : y[1] >> 1) ^ -1) / 9 - 294941;
x[3] = sqrt(0.91) / (abs(1.0 - sqrt(sqrt(x[3] / 2.011))) + 0.001); y[3] *= 791ull / 14 + 502; y[3] = ((i < 32 ? y[3] << 1 : y[3] >> 1) ^ -1) / 9 - 294941;
x[0] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[0] / 2.01))) + 0.01); y[0] *= 789ull / 13 + 501; y[0] = ((i < 32 ? y[0] << 1 : y[0] >> 1) ^ -1) / 7 - 294939;
x[2] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[2] / 2.01))) + 0.01); y[2] *= 789ull / 13 + 501; y[2] = ((i < 32 ? y[2] << 1 : y[2] >> 1) ^ -1) / 7 - 294939;
x[1] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[1] / 2.01))) + 0.01); y[1] *= 789ull / 13 + 501; y[1] = ((i < 32 ? y[1] << 1 : y[1] >> 1) ^ -1) / 7 - 294939;
x[3] = sqrt(1.15) / (abs(1.0 - sqrt(sqrt(x[3] / 2.01))) + 0.01); y[3] *= 789ull / 13 + 501; y[3] = ((i < 32 ? y[3] << 1 : y[3] >> 1) ^ -1) / 7 - 294939;
x[0] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[0] / 2.009))) + 0.1); y[0] *= 787ull / 11 + 500; y[0] = ((i < 32 ? y[0] << 1 : y[0] >> 1) ^ -1) / 5 - 294937;
x[2] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[2] / 2.009))) + 0.1); y[2] *= 787ull / 11 + 500; y[2] = ((i < 32 ? y[2] << 1 : y[2] >> 1) ^ -1) / 5 - 294937;
x[1] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[1] / 2.009))) + 0.1); y[1] *= 787ull / 11 + 500; y[1] = ((i < 32 ? y[1] << 1 : y[1] >> 1) ^ -1) / 5 - 294937;
x[3] = sqrt(0.85) / (abs(1.0 - sqrt(sqrt(x[3] / 2.009))) + 0.1); y[3] *= 787ull / 11 + 500; y[3] = ((i < 32 ? y[3] << 1 : y[3] >> 1) ^ -1) / 5 - 294937;
)
x[0] *= x[0] * 1.01010101010101 + 0.00021; x[2] *= x[2] * 1.01010101010101 + 0.00021;
x[1] *= x[1] * 1.01010101010101 + 0.00021; x[3] *= x[3] * 1.01010101010101 + 0.00021;
}
}