#include <x86intrin.h> __m128i load_00123456(const unsigned short *data) { return _mm_setr_epi16(data[0], data[0], data[1], data[2], data[3], data[4], data[5], data[6]); } -O3 -march=btver2 _Z13load_00123456PKt: # @_Z13load_00123456PKt movzwl (%rdi), %eax vmovd %eax, %xmm0 vpinsrw $1, %eax, %xmm0, %xmm0 vpinsrw $2, 2(%rdi), %xmm0, %xmm0 vpinsrw $3, 4(%rdi), %xmm0, %xmm0 vpinsrw $4, 6(%rdi), %xmm0, %xmm0 vpinsrw $5, 8(%rdi), %xmm0, %xmm0 vpinsrw $6, 10(%rdi), %xmm0, %xmm0 vpinsrw $7, 12(%rdi), %xmm0, %xmm0 retq Many of the loads/insertions could be merged to something like: _Z19load_00123456_mergePKt: # @_Z19load_00123456_mergePKt movzwl (%rdi), %eax vmovd %eax, %xmm0 vpshuflw $224, %xmm0, %xmm0 # xmm0 = xmm0[0,0,2,3,4,5,6,7] vpshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0] vpinsrd $1, 2(%rdi), %xmm0, %xmm0 vpinsrq $1, 6(%rdi), %xmm0, %xmm0 retq https://godbolt.org/z/-HLpsE