commit 085b8076fa45aaae161e91dcdb8eef452b1bb049
parent ff97362647c8e90803c544cbe4fb904c83a4ed7d
Author: Robert Russell <robert@rr3.xyz>
Date: Sat, 19 Oct 2024 18:57:50 -0700
Remove p- prefix on AVX256 OR and XOR
Not sure why they were there in the first place...
Diffstat:
2 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/inc/rand.h b/inc/rand.h
@@ -28,7 +28,9 @@ void r_make_hash_key(u64 (*key)[4], u64 seed);
/* Generate len (truly) random bytes using the /dev/urandom interface and put
* the result in buf. Return 0 on success; on error, return -1 and set errno.
* r_trand is slow; it's intented use is to seed userspace PRNG's (like
- * r_prand64) on program initialization. */
+ * r_prand64) on program initialization.
+ * TODO: Use getrandom instead? getrandom blocks when /dev/urandom doesn't have
+ * enough entropy yet. */
int r_trand(u8 *buf, usize len);
/* Generate a pseudo-random u64 seeded from the given u64*, or from r_seed if
diff --git a/inc/simd.h b/inc/simd.h
@@ -348,23 +348,23 @@ static inline v8u32 v8u32_andnot(v8u32 x, v8u32 y) { return (v8u32) __builti
static inline v4i64 v4i64_andnot(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_andnotsi256((v4di)x, (v4di)y); }
static inline v4u64 v4u64_andnot(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_andnotsi256((v4di)x, (v4di)y); }
-static inline v32i8 v32i8_por(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_por256((v4di)x, (v4di)y); }
-static inline v32u8 v32u8_por(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_por256((v4di)x, (v4di)y); }
-static inline v16i16 v16i16_por(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_por256((v4di)x, (v4di)y); }
-static inline v16u16 v16u16_por(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_por256((v4di)x, (v4di)y); }
-static inline v8i32 v8i32_por(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_por256((v4di)x, (v4di)y); }
-static inline v8u32 v8u32_por(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_por256((v4di)x, (v4di)y); }
-static inline v4i64 v4i64_por(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_por256((v4di)x, (v4di)y); }
-static inline v4u64 v4u64_por(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_por256((v4di)x, (v4di)y); }
-
-static inline v32i8 v32i8_pxor(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_pxor256((v4di)x, (v4di)y); }
-static inline v32u8 v32u8_pxor(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_pxor256((v4di)x, (v4di)y); }
-static inline v16i16 v16i16_pxor(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_pxor256((v4di)x, (v4di)y); }
-static inline v16u16 v16u16_pxor(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_pxor256((v4di)x, (v4di)y); }
-static inline v8i32 v8i32_pxor(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_pxor256((v4di)x, (v4di)y); }
-static inline v8u32 v8u32_pxor(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_pxor256((v4di)x, (v4di)y); }
-static inline v4i64 v4i64_pxor(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_pxor256((v4di)x, (v4di)y); }
-static inline v4u64 v4u64_pxor(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_pxor256((v4di)x, (v4di)y); }
+static inline v32i8 v32i8_or(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_or256((v4di)x, (v4di)y); }
+static inline v32u8 v32u8_or(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_or256((v4di)x, (v4di)y); }
+static inline v16i16 v16i16_or(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_or256((v4di)x, (v4di)y); }
+static inline v16u16 v16u16_or(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_or256((v4di)x, (v4di)y); }
+static inline v8i32 v8i32_or(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_or256((v4di)x, (v4di)y); }
+static inline v8u32 v8u32_or(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_or256((v4di)x, (v4di)y); }
+static inline v4i64 v4i64_or(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_or256((v4di)x, (v4di)y); }
+static inline v4u64 v4u64_or(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_or256((v4di)x, (v4di)y); }
+
+static inline v32i8 v32i8_xor(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_xor256((v4di)x, (v4di)y); }
+static inline v32u8 v32u8_xor(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_xor256((v4di)x, (v4di)y); }
+static inline v16i16 v16i16_xor(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_xor256((v4di)x, (v4di)y); }
+static inline v16u16 v16u16_xor(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_xor256((v4di)x, (v4di)y); }
+static inline v8i32 v8i32_xor(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_xor256((v4di)x, (v4di)y); }
+static inline v8u32 v8u32_xor(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_xor256((v4di)x, (v4di)y); }
+static inline v4i64 v4i64_xor(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_xor256((v4di)x, (v4di)y); }
+static inline v4u64 v4u64_xor(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_xor256((v4di)x, (v4di)y); }
static inline v16u16 v16u16_sl(v16u16 x, v8u16 y) { return (v16u16)__builtin_ia32_psllw256((v16hi)x, (v8hi)y); }
static inline v8u32 v8u32_sl(v8u32 x, v4u32 y) { return (v8u32)__builtin_ia32_pslld256((v8si)x, (v4si)y); }