LLVM Bugzilla is read-only and represents the historical archive of all LLVM issues filled before November 26, 2021. Use github to submit LLVM bugs

Bug 39473 - Merge consecutive load insertions into a vector where possible
Summary: Merge consecutive load insertions into a vector where possible
Status: NEW
Alias: None
Product: libraries
Classification: Unclassified
Component: Common Code Generator Code (show other bugs)
Version: trunk
Hardware: PC Windows NT
: P enhancement
Assignee: Unassigned LLVM Bugs
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2018-10-29 06:24 PDT by Simon Pilgrim
Modified: 2018-10-29 06:24 PDT (History)
4 users (show)

See Also:
Fixed By Commit(s):


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Simon Pilgrim 2018-10-29 06:24:56 PDT
#include <x86intrin.h>

__m128i load_00123456(const unsigned short *data) {
  return _mm_setr_epi16(data[0], data[0], data[1], data[2], data[3], data[4], data[5], data[6]);
}

-O3 -march=btver2

_Z13load_00123456PKt: # @_Z13load_00123456PKt
  movzwl (%rdi), %eax
  vmovd %eax, %xmm0
  vpinsrw $1, %eax, %xmm0, %xmm0
  vpinsrw $2, 2(%rdi), %xmm0, %xmm0
  vpinsrw $3, 4(%rdi), %xmm0, %xmm0
  vpinsrw $4, 6(%rdi), %xmm0, %xmm0
  vpinsrw $5, 8(%rdi), %xmm0, %xmm0
  vpinsrw $6, 10(%rdi), %xmm0, %xmm0
  vpinsrw $7, 12(%rdi), %xmm0, %xmm0
  retq

Many of the loads/insertions could be merged to something like:

_Z19load_00123456_mergePKt: # @_Z19load_00123456_mergePKt
  movzwl (%rdi), %eax
  vmovd %eax, %xmm0
  vpshuflw $224, %xmm0, %xmm0 # xmm0 = xmm0[0,0,2,3,4,5,6,7]
  vpshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
  vpinsrd $1, 2(%rdi), %xmm0, %xmm0
  vpinsrq $1, 6(%rdi), %xmm0, %xmm0
  retq

https://godbolt.org/z/-HLpsE