// Linear search for an int in an int array // with NEON SIMD vector instructions, // because GCC's vector_size extensions // to C don't support any horizontal // operations. Does appear to work in // minimal smoke tests. // Cleaned up o4-mini and GPT-5 mini code #include #include size_t find(const int32_t *a, size_t n, int32_t k) { // Duplicate needle into a NEON vector int32x4_t kv = vdupq_n_s32(k); size_t i = 0; for (; i + 4 <= n; i += 4) { // Load 4 integers from the array // GPT-5 says, "vld1q_s32 accepts // unaligned pointers on ARMv8; for // older ARM or strict alignment, use // memcpy or aligned loads." int32x4_t v = vld1q_s32(&a[i]); // Compare them to k uint32x4_t eq = vceqq_s32(v, kv); // Sum lanes and bail out if any match if (vaddvq_u32(eq)) break; } // If we are here, either we found a // match (but don't yet know in which // element) or we have finished all // but the last 0–3 elememts. while (i < n && a[i] != k) i++; return i; // n if not found }