#include void xor_prefix_sum_bitslice(uint32_t *W) { for (int i = 1; i < 1024; i++) W[i] ^= W[i-1]; } void xor_prefix_sum_bitslice_unrolled(uint32_t *W) { W[1] ^= W[0]; W[2] ^= W[1]; W[3] ^= W[2]; for (int i = 4; i < 1024; i += 4) { W[i] ^= W[i-1]; W[i+1] ^= W[i]; W[i+2] ^= W[i+1]; W[i+3] ^= W[i+2]; } }