rcx

miscellaneous C library
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit 085b8076fa45aaae161e91dcdb8eef452b1bb049
parent ff97362647c8e90803c544cbe4fb904c83a4ed7d
Author: Robert Russell <robert@rr3.xyz>
Date:   Sat, 19 Oct 2024 18:57:50 -0700

Remove p- prefix on AVX256 OR and XOR

Not sure why they were there in the first place...

Diffstat:
Minc/rand.h | 4+++-
Minc/simd.h | 34+++++++++++++++++-----------------
2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/inc/rand.h b/inc/rand.h @@ -28,7 +28,9 @@ void r_make_hash_key(u64 (*key)[4], u64 seed); /* Generate len (truly) random bytes using the /dev/urandom interface and put * the result in buf. Return 0 on success; on error, return -1 and set errno. * r_trand is slow; it's intented use is to seed userspace PRNG's (like - * r_prand64) on program initialization. */ + * r_prand64) on program initialization. + * TODO: Use getrandom instead? getrandom blocks when /dev/urandom doesn't have + * enough entropy yet. */ int r_trand(u8 *buf, usize len); /* Generate a pseudo-random u64 seeded from the given u64*, or from r_seed if diff --git a/inc/simd.h b/inc/simd.h @@ -348,23 +348,23 @@ static inline v8u32 v8u32_andnot(v8u32 x, v8u32 y) { return (v8u32) __builti static inline v4i64 v4i64_andnot(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_andnotsi256((v4di)x, (v4di)y); } static inline v4u64 v4u64_andnot(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_andnotsi256((v4di)x, (v4di)y); } -static inline v32i8 v32i8_por(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_por256((v4di)x, (v4di)y); } -static inline v32u8 v32u8_por(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_por256((v4di)x, (v4di)y); } -static inline v16i16 v16i16_por(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_por256((v4di)x, (v4di)y); } -static inline v16u16 v16u16_por(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_por256((v4di)x, (v4di)y); } -static inline v8i32 v8i32_por(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_por256((v4di)x, (v4di)y); } -static inline v8u32 v8u32_por(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_por256((v4di)x, (v4di)y); } -static inline v4i64 v4i64_por(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_por256((v4di)x, (v4di)y); } -static inline v4u64 v4u64_por(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_por256((v4di)x, (v4di)y); } - -static inline v32i8 v32i8_pxor(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_pxor256((v4di)x, (v4di)y); } -static inline v32u8 v32u8_pxor(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_pxor256((v4di)x, (v4di)y); } -static inline v16i16 v16i16_pxor(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_pxor256((v4di)x, (v4di)y); } -static inline v16u16 v16u16_pxor(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_pxor256((v4di)x, (v4di)y); } -static inline v8i32 v8i32_pxor(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_pxor256((v4di)x, (v4di)y); } -static inline v8u32 v8u32_pxor(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_pxor256((v4di)x, (v4di)y); } -static inline v4i64 v4i64_pxor(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_pxor256((v4di)x, (v4di)y); } -static inline v4u64 v4u64_pxor(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_pxor256((v4di)x, (v4di)y); } +static inline v32i8 v32i8_or(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_or256((v4di)x, (v4di)y); } +static inline v32u8 v32u8_or(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_or256((v4di)x, (v4di)y); } +static inline v16i16 v16i16_or(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_or256((v4di)x, (v4di)y); } +static inline v16u16 v16u16_or(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_or256((v4di)x, (v4di)y); } +static inline v8i32 v8i32_or(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_or256((v4di)x, (v4di)y); } +static inline v8u32 v8u32_or(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_or256((v4di)x, (v4di)y); } +static inline v4i64 v4i64_or(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_or256((v4di)x, (v4di)y); } +static inline v4u64 v4u64_or(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_or256((v4di)x, (v4di)y); } + +static inline v32i8 v32i8_xor(v32i8 x, v32i8 y) { return (v32i8) __builtin_ia32_xor256((v4di)x, (v4di)y); } +static inline v32u8 v32u8_xor(v32u8 x, v32u8 y) { return (v32u8) __builtin_ia32_xor256((v4di)x, (v4di)y); } +static inline v16i16 v16i16_xor(v16i16 x, v16i16 y) { return (v16i16)__builtin_ia32_xor256((v4di)x, (v4di)y); } +static inline v16u16 v16u16_xor(v16u16 x, v16u16 y) { return (v16u16)__builtin_ia32_xor256((v4di)x, (v4di)y); } +static inline v8i32 v8i32_xor(v8i32 x, v8i32 y) { return (v8i32) __builtin_ia32_xor256((v4di)x, (v4di)y); } +static inline v8u32 v8u32_xor(v8u32 x, v8u32 y) { return (v8u32) __builtin_ia32_xor256((v4di)x, (v4di)y); } +static inline v4i64 v4i64_xor(v4i64 x, v4i64 y) { return (v4i64) __builtin_ia32_xor256((v4di)x, (v4di)y); } +static inline v4u64 v4u64_xor(v4u64 x, v4u64 y) { return (v4u64) __builtin_ia32_xor256((v4di)x, (v4di)y); } static inline v16u16 v16u16_sl(v16u16 x, v8u16 y) { return (v16u16)__builtin_ia32_psllw256((v16hi)x, (v8hi)y); } static inline v8u32 v8u32_sl(v8u32 x, v4u32 y) { return (v8u32)__builtin_ia32_pslld256((v8si)x, (v4si)y); }