Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86][SSE] Failure to merge 2 x float stores to <2 x float> store #41237

Open
RKSimon opened this issue May 15, 2019 · 1 comment
Open

[X86][SSE] Failure to merge 2 x float stores to <2 x float> store #41237

RKSimon opened this issue May 15, 2019 · 1 comment
Labels
backend:X86 bugzilla Issues migrated from bugzilla

Comments

@RKSimon
Copy link
Collaborator

RKSimon commented May 15, 2019

Bugzilla Link 41892
Version trunk
OS Windows NT
CC @anton-afanasyev,@topperc,@RKSimon,@rotateright

Extended Description

https://godbolt.org/z/InfZZK

#include <x86intrin.h>

void sum_pairs_128(__m128 f, float *p) {
  p[0] = f[0] + f[1];
  p[1] = f[2] + f[3];
}

clang -O3 -march=btver2

_Z13sum_pairs_128Dv4_fPf: # @_Z13sum_pairs_128Dv4_fPf
  vhaddps %xmm0, %xmm0, %xmm0
  vmovss %xmm0, (%rdi)
  vextractps $1, %xmm0, 4(%rdi)
  retq

would be better if we managed to merge to:

_Z13sum_pairs_128Dv4_fPf: # @_Z13sum_pairs_128Dv4_fPf
  vhaddps %xmm0, %xmm0, %xmm0
  vmovsd %xmm0, (%rdi)
  retq
@RKSimon
Copy link
Collaborator Author

RKSimon commented May 17, 2019

float3 cases would be useful as well: https://godbolt.org/z/DRs8Qf

#include <x86intrin.h>

void store_float3(__m128 f, float *p) {
  *p++ = f[0];
  *p++ = f[1];
  *p++ = f[2];
}

void store_float3_splat(__m128 f, float *p) {
  *p++ = f[0];
  *p++ = f[0];
  *p++ = f[0];
}
_Z12store_float3Dv4_fPf: # @_Z12store_float3Dv4_fPf
  vmovss %xmm0, (%rdi)
  vextractps $1, %xmm0, 4(%rdi)
  vextractps $2, %xmm0, 8(%rdi)
  retq
_Z18store_float3_splatDv4_fPf: # @_Z18store_float3_splatDv4_fPf
  vmovss %xmm0, (%rdi)
  vmovss %xmm0, 4(%rdi)
  vmovss %xmm0, 8(%rdi)
  retq

which could be:

_Z12store_float3Dv4_fPf: # @_Z12store_float3Dv4_fPf
  vmovsd %xmm0, (%rdi)
  vextractps $2, %xmm0, 8(%rdi)
  retq
_Z18store_float3_splatDv4_fPf: # @_Z18store_float3_splatDv4_fPf
  vpermilps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
  vmovsd %xmm0, (%rdi)
  movss xmm0, 8(%rdi)
  retq

@llvmbot llvmbot transferred this issue from llvm/llvm-bugzilla-archive Dec 10, 2021
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 bugzilla Issues migrated from bugzilla
Projects
None yet
Development

No branches or pull requests

1 participant