From a499dd2629719836647341bab51f5bf7189b35b0 Mon Sep 17 00:00:00 2001 From: jbb01 <32650546+jbb01@users.noreply.github.com> Date: Thu, 11 Sep 2025 10:26:43 +0200 Subject: [PATCH] [07_weird_assembly_machine] add a faster implementation of func0 --- .cargo/config.toml | 2 ++ src/bin/07_weird_assembly_machine.rs | 41 ++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 12 deletions(-) create mode 100644 .cargo/config.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..b736a9f --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[target.x86_64-unknown-linux-gnu] +rustflags = ["-Ctarget-cpu=native"] \ No newline at end of file diff --git a/src/bin/07_weird_assembly_machine.rs b/src/bin/07_weird_assembly_machine.rs index 8a7cf2e..d6a9ea7 100644 --- a/src/bin/07_weird_assembly_machine.rs +++ b/src/bin/07_weird_assembly_machine.rs @@ -1,3 +1,4 @@ +#![allow(dead_code)] use lib::compute_async; // https://button.qedaka.de/07_weird_assembly_machine.html?name=Jonah&n=10000000000000070&x=4410539207075887818 @@ -68,8 +69,8 @@ const DATA: &[u64] = &[ 11796026101712826135, 2668163399826034890, 1114227485540495868, 12046048025420064663, ]; -const N: u64 = 10000000047000070; -const X: u64 = 2925478411027973783; +const N: u64 = 10000045930000000; +const X: u64 = 16195396392266746892; fn main() { compute_async(|tx| { @@ -86,16 +87,6 @@ fn main() { }); } -fn func0(a: u64, mut b: u64) -> u64 { - let mut x: u64 = 0; - while b != 0 { - let c = b.trailing_zeros(); - b = b ^ 1u64.rotate_left(c); - x = x ^ a.rotate_left(c); - } - x -} - fn func1(x: u64, mut n: u64) -> u64 { let x = func2(x, &DATA[0..128]); if x <= n { @@ -112,6 +103,32 @@ fn func2(x: u64, data: &[u64]) -> u64 { out } +#[cfg(not(all(target_arch = "x86_64", target_feature = "pclmulqdq")))] +fn func0(a: u64, mut b: u64) -> u64 { + let mut x: u64 = 0; + while b != 0 { + let c = b.trailing_zeros(); + b = b ^ 1u64.rotate_left(c); + x = x ^ a.rotate_left(c); + } + x +} + +#[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq"))] +fn func0(a: u64, b: u64) -> u64 { + use std::arch::x86_64::*; + unsafe { + let a_vec = _mm_set_epi64x(0, a as i64); + let b_vec = _mm_set_epi64x(0, b as i64); + let result = _mm_clmulepi64_si128(a_vec, b_vec, 0x00); + + // Extract both 64-bit halves and XOR them + let low = _mm_extract_epi64(result, 0) as u64; + let high = _mm_extract_epi64(result, 1) as u64; + low ^ high + } +} + #[cfg(test)] mod tests { use super::*;