simd-scan

SIMD scan implementation and benchmark
git clone git://git.rr3.xyz/simd-scan
Log | Files | Refs

main.c (1216B)


      1 #include <rcx/all.h>
      2 #include <rcx/bench.h>
      3 #include <stdio.h>
      4 
      5 #include "func.h"
      6 
      7 extern u64 scan_avx1024_whoa(u64 *y, u64 *x);
      8 
      9 u64 in[32], out[32];
     10 volatile u64 out_total;
     11 
     12 #define BENCHMARK(f) \
     13 	void \
     14 	benchmark_##f(u64 N) { \
     15 		r_bench_start(); \
     16 		for (u64 n = 0; n < N; n++) \
     17 			out_total = f(out, in); \
     18 		r_bench_stop(); \
     19 	}
     20 
     21 BENCHMARK(scan_scalar1024)
     22 BENCHMARK(scan_scalar2048)
     23 BENCHMARK(scan_scalar2048_unrolled)
     24 BENCHMARK(scan_avx1024_serial)
     25 BENCHMARK(scan_avx1024)
     26 BENCHMARK(scan_avx1024_whoa)
     27 
     28 int
     29 main(void) {
     30 	printf("%lu\n", scan_avx1024_whoa(out, (u64[16]){
     31 		0x0, 0x1, 0x2, 0x3,
     32 		0x4, 0x5, 0x6, 0x7,
     33 		0x8, 0x9, 0xa, 0xb,
     34 		0xc, 0xd, 0xe, 0xf,
     35 	}));
     36 	printf("%lu %lu %lu %lu\n", out[0], out[1], out[2], out[3]);
     37 	printf("%lu %lu %lu %lu\n", out[4], out[5], out[6], out[7]);
     38 	printf("%lu %lu %lu %lu\n", out[8], out[9], out[10], out[11]);
     39 	printf("%lu %lu %lu %lu\n", out[12], out[13], out[14], out[15]);
     40 	// return 0;
     41 	r_bench(benchmark_scan_scalar1024, 3000);
     42 	r_bench(benchmark_scan_scalar2048, 3000);
     43 	r_bench(benchmark_scan_scalar2048_unrolled, 3000);
     44 	r_bench(benchmark_scan_avx1024_serial, 3000);
     45 	r_bench(benchmark_scan_avx1024, 3000);
     46 	r_bench(benchmark_scan_avx1024_whoa, 3000);
     47 }