From 53700dd27e18ca77eb4f7b43506c515fd0ddd9a6 Mon Sep 17 00:00:00 2001 From: Vishal Pankaj Chandratreya <19171016+tfpf@users.noreply.github.com> Date: Wed, 1 May 2024 00:31:12 +0530 Subject: [PATCH] Multithreaded sieve of Atkin. (#158) * Made algorithms 3.1, 3.2, 3.3, 4.1, 4.2 and 4.3 static. * Because `self` cannot be passed to worker threads. * Producer and consumer proof-of-concept using transmitter and receiver channels. * Is slower, probably because of the constant message passing. * On GitHub Actions, the test workflow gets cancelled because of excessive memory usage. * Could be because the message buffer gets filled faster than it can be consumed. * Updated to copy sieves between threads and combine them afterwards. * This increases the memory usage by a precise amount (the number of threads) rather than indefinitely. * Serial test runs to allow use of 4 cores on macOS 13. * Undid this change, because tests were running faster on Ubuntu. * But kept a macOS 13 runner for sanity, because it runs on an M1 CPU. * Tried combining the copies of the sieves while updating the main sieve. * Slower than combining first and updating in another loop, so reverted. --- .github/workflows/sanity.yml | 2 +- src/utils/objects/sieve_of_atkin.rs | 84 +++++++++++++++++------------ 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/.github/workflows/sanity.yml b/.github/workflows/sanity.yml index 2380a34..de1356c 100644 --- a/.github/workflows/sanity.yml +++ b/.github/workflows/sanity.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-12, ubuntu-22.04, windows-2022] + os: [macos-13, macos-14, ubuntu-22.04, windows-2022] steps: - uses: actions/checkout@v4 - run: cargo -vv run -vv --release diff --git a/src/utils/objects/sieve_of_atkin.rs b/src/utils/objects/sieve_of_atkin.rs index a337a1b..a482f6d 100644 --- a/src/utils/objects/sieve_of_atkin.rs +++ b/src/utils/objects/sieve_of_atkin.rs @@ -46,22 +46,40 @@ impl SieveOfAtkin { sieve_of_atkin } fn init(&mut self) { - self.algorithm_3_1(1); - self.algorithm_3_1(13); - self.algorithm_3_1(17); - self.algorithm_3_1(29); - self.algorithm_3_1(37); - self.algorithm_3_1(41); - self.algorithm_3_1(49); - self.algorithm_3_1(53); - self.algorithm_3_2(7); - self.algorithm_3_2(19); - self.algorithm_3_2(31); - self.algorithm_3_2(43); - self.algorithm_3_3(11); - self.algorithm_3_3(23); - self.algorithm_3_3(47); - self.algorithm_3_3(59); + let mut sieve3 = self.sieve.clone(); + let mut sieve2 = self.sieve.clone(); + let mut sieve1 = self.sieve.clone(); + let sieve0 = self.sieve.as_mut_slice(); + std::thread::scope(|s| { + s.spawn(|| { + for delta in [1, 13, 17, 29] { + SieveOfAtkin::algorithm_3_1(sieve0, delta); + } + }); + s.spawn(|| { + for delta in [37, 41, 49, 53] { + SieveOfAtkin::algorithm_3_1(&mut sieve1, delta); + } + }); + s.spawn(|| { + for delta in [7, 19, 31, 43] { + SieveOfAtkin::algorithm_3_2(&mut sieve2, delta); + } + }); + s.spawn(|| { + for delta in [11, 23, 47, 59] { + SieveOfAtkin::algorithm_3_3(&mut sieve3, delta); + } + }); + }); + + // Combine the results. Since no two threads operated on bits at the + // same position, the bitfields can simply be ORed. Zipping them + // instead of their iterators ensures that they get dropped + // automatically. + for (((s0, s1), s2), s3) in sieve0.iter_mut().zip(sieve1).zip(sieve2).zip(sieve3) { + *s0 |= s1 | s2 | s3; + } // Mark composite all numbers divisible by the squares of primes. let mut num: usize = 1; @@ -84,40 +102,40 @@ impl SieveOfAtkin { } } } - fn algorithm_3_1(&mut self, delta: i32) { + fn algorithm_3_1(sieve: &mut [u16], delta: i32) { for f in 1..=15 { for g in (1..=30).step_by(2) { let quadratic = 4 * f * f + g * g; if delta == quadratic % 60 { - self.algorithm_4_1(delta, f, g, quadratic / 60); + SieveOfAtkin::algorithm_4_1(sieve, delta, f, g, quadratic / 60); } } } } - fn algorithm_3_2(&mut self, delta: i32) { + fn algorithm_3_2(sieve: &mut [u16], delta: i32) { for f in (1..=10).step_by(2) { for g in [2, 4, 8, 10, 14, 16, 20, 22, 26, 28] { let quadratic = 3 * f * f + g * g; if delta == quadratic % 60 { - self.algorithm_4_2(delta, f, g, quadratic / 60); + SieveOfAtkin::algorithm_4_2(sieve, delta, f, g, quadratic / 60); } } } } - fn algorithm_3_3(&mut self, delta: i32) { + fn algorithm_3_3(sieve: &mut [u16], delta: i32) { for (f, gstart) in (1..=10).zip([2, 1].into_iter().cycle()) { for g in (gstart..=30).step_by(2) { let quadratic = 3i32 * f * f - g * g; // Remainder can be negative, so perform modulo operation. if delta == quadratic.rem_euclid(60) { - self.algorithm_4_3(delta, f, g, quadratic.div_euclid(60)); + SieveOfAtkin::algorithm_4_3(sieve, delta, f, g, quadratic.div_euclid(60)); } } } } - fn algorithm_4_1(&mut self, delta: i32, f: i32, g: i32, h: i32) { + fn algorithm_4_1(sieve: &mut [u16], delta: i32, f: i32, g: i32, h: i32) { let (mut x, mut y0, mut k0) = (f as i64, g as i64, h as i64); - while k0 < self.sieve.len() as i64 { + while k0 < sieve.len() as i64 { (k0, x) = (k0 + 2 * x + 15, x + 15); } loop { @@ -129,15 +147,15 @@ impl SieveOfAtkin { (k0, y0) = (k0 + y0 + 15, y0 + 30); } let (mut k, mut y) = (k0, y0); - while k < self.sieve.len() as i64 { - self.sieve[k as usize] ^= 1u16 << SieveOfAtkin::SHIFTS[delta as usize]; + while k < sieve.len() as i64 { + sieve[k as usize] ^= 1u16 << SieveOfAtkin::SHIFTS[delta as usize]; (k, y) = (k + y + 15, y + 30); } } } - fn algorithm_4_2(&mut self, delta: i32, f: i32, g: i32, h: i32) { + fn algorithm_4_2(sieve: &mut [u16], delta: i32, f: i32, g: i32, h: i32) { let (mut x, mut y0, mut k0) = (f as i64, g as i64, h as i64); - while k0 < self.sieve.len() as i64 { + while k0 < sieve.len() as i64 { (k0, x) = (k0 + x + 5, x + 10); } loop { @@ -149,16 +167,16 @@ impl SieveOfAtkin { (k0, y0) = (k0 + y0 + 15, y0 + 30); } let (mut k, mut y) = (k0, y0); - while k < self.sieve.len() as i64 { - self.sieve[k as usize] ^= 1u16 << SieveOfAtkin::SHIFTS[delta as usize]; + while k < sieve.len() as i64 { + sieve[k as usize] ^= 1u16 << SieveOfAtkin::SHIFTS[delta as usize]; (k, y) = (k + y + 15, y + 30); } } } - fn algorithm_4_3(&mut self, delta: i32, f: i32, g: i32, h: i32) { + fn algorithm_4_3(sieve: &mut [u16], delta: i32, f: i32, g: i32, h: i32) { let (mut x, mut y0, mut k0) = (f as i64, g as i64, h as i64); loop { - while k0 >= self.sieve.len() as i64 { + while k0 >= sieve.len() as i64 { if x <= y0 { return; } @@ -166,7 +184,7 @@ impl SieveOfAtkin { } let (mut k, mut y) = (k0, y0); while k >= 0 && y < x { - self.sieve[k as usize] ^= 1u16 << SieveOfAtkin::SHIFTS[delta as usize]; + sieve[k as usize] ^= 1u16 << SieveOfAtkin::SHIFTS[delta as usize]; (k, y) = (k - y - 15, y + 30); } (k0, x) = (k0 + x + 5, x + 10);