/* Hello-world with SSE 4.2 intrinsics. */ #include #include #include #include int main(int argc, char **argv) { /* __m128 is four floats, __m128d is two doubles, __m64 is whatever MMX thing. _epi16 is extended (128-bit) packed integers of 16 bits. mullo is the low-order bits of the multiplication result, and mulhi is the high-order bits, but be careful about that when it’s negative. */ __m128i a = _mm_set_epi16(225, 210, 42, 99, -32767, -32768, -5, 0) , b = _mm_set_epi16(0, 11, 2, -64, -1, 2, -32767, 1024) , c = _mm_add_epi16(a, b) , d = _mm_mullo_epi16(a, b) , e = _mm_mulhi_epi16(a, b) ; /* See for why this is OK. */ alignas(16) int16_t ai[8], bi[8], ci[8], di[8], ei[8]; /* si128 is a 128-bit vector. */ _mm_store_si128((__m128i*)&ai, a); _mm_store_si128((__m128i*)&bi, b); _mm_store_si128((__m128i*)&ci, c); _mm_store_si128((__m128i*)&di, d); _mm_store_si128((__m128i*)&ei, e); for (int i = 0; i < 8; i++) { printf("%d + %d = %d\n", (int)ai[i], (int)bi[i], (int)ci[i]); printf("%d · %d = %d (+ %d)\n", (int)ai[i], (int)bi[i], (int)di[i], (int)ei[i] * 65536); } return 0; }