Skip to content

Commit a02b449

Browse files
committed
[X86] Sync AESENC/DEC Key Locker builtins with gcc.
For the wide builtins, pass a single input and output pointer to the builtins. Emit the GEPs and input loads from CGBuiltin.
1 parent 230c57b commit a02b449

File tree

5 files changed

+587
-153
lines changed

5 files changed

+587
-153
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1902,22 +1902,16 @@ TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd")
19021902

19031903
// KEY LOCKER
19041904
TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl")
1905-
TARGET_BUILTIN(__builtin_ia32_encodekey128_u32,
1906-
"UiUiV2Oiv*", "nV:128:", "kl")
1907-
TARGET_BUILTIN(__builtin_ia32_encodekey256_u32,
1908-
"UiUiV2OiV2Oiv*", "nV:128:", "kl")
1909-
TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1910-
TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1911-
TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1912-
TARGET_BUILTIN(__builtin_ia32_aesdec256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1913-
TARGET_BUILTIN(__builtin_ia32_aesencwide128kl,
1914-
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
1915-
TARGET_BUILTIN(__builtin_ia32_aesencwide256kl,
1916-
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
1917-
TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl,
1918-
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
1919-
TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl,
1920-
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
1905+
TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, "UiUiV2Oiv*", "nV:128:", "kl")
1906+
TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, "UiUiV2OiV2Oiv*", "nV:128:", "kl")
1907+
TARGET_BUILTIN(__builtin_ia32_aesenc128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1908+
TARGET_BUILTIN(__builtin_ia32_aesenc256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1909+
TARGET_BUILTIN(__builtin_ia32_aesdec128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1910+
TARGET_BUILTIN(__builtin_ia32_aesdec256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
1911+
TARGET_BUILTIN(__builtin_ia32_aesencwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
1912+
TARGET_BUILTIN(__builtin_ia32_aesencwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
1913+
TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
1914+
TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
19211915

19221916
// SERIALIZE
19231917
TARGET_BUILTIN(__builtin_ia32_serialize, "v", "n", "serialize")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 53 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -14070,75 +14070,67 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1407014070

1407114071
return Builder.CreateExtractValue(Call, 0);
1407214072
}
14073-
case X86::BI__builtin_ia32_aesenc128kl:
14074-
case X86::BI__builtin_ia32_aesdec128kl:
14075-
case X86::BI__builtin_ia32_aesenc256kl:
14076-
case X86::BI__builtin_ia32_aesdec256kl:
14077-
case X86::BI__builtin_ia32_aesencwide128kl:
14078-
case X86::BI__builtin_ia32_aesdecwide128kl:
14079-
case X86::BI__builtin_ia32_aesencwide256kl:
14080-
case X86::BI__builtin_ia32_aesdecwide256kl: {
14081-
int FirstReturnOp;
14082-
int ResultCount;
14083-
SmallVector<Value*, 9> InOps;
14084-
unsigned ID;
14085-
14073+
case X86::BI__builtin_ia32_aesenc128kl_u8:
14074+
case X86::BI__builtin_ia32_aesdec128kl_u8:
14075+
case X86::BI__builtin_ia32_aesenc256kl_u8:
14076+
case X86::BI__builtin_ia32_aesdec256kl_u8: {
14077+
Intrinsic::ID IID;
1408614078
switch (BuiltinID) {
14087-
default: llvm_unreachable("Unsupported intrinsic!");
14088-
case X86::BI__builtin_ia32_aesenc128kl:
14089-
case X86::BI__builtin_ia32_aesdec128kl:
14090-
case X86::BI__builtin_ia32_aesenc256kl:
14091-
case X86::BI__builtin_ia32_aesdec256kl: {
14092-
InOps = {Ops[1], Ops[2]};
14093-
FirstReturnOp = 0;
14094-
ResultCount = 1;
14095-
switch (BuiltinID) {
14096-
case X86::BI__builtin_ia32_aesenc128kl:
14097-
ID = Intrinsic::x86_aesenc128kl;
14098-
break;
14099-
case X86::BI__builtin_ia32_aesdec128kl:
14100-
ID = Intrinsic::x86_aesdec128kl;
14101-
break;
14102-
case X86::BI__builtin_ia32_aesenc256kl:
14103-
ID = Intrinsic::x86_aesenc256kl;
14104-
break;
14105-
case X86::BI__builtin_ia32_aesdec256kl:
14106-
ID = Intrinsic::x86_aesdec256kl;
14107-
break;
14108-
}
14079+
default: llvm_unreachable("Unexpected builtin");
14080+
case X86::BI__builtin_ia32_aesenc128kl_u8:
14081+
IID = Intrinsic::x86_aesenc128kl;
14082+
break;
14083+
case X86::BI__builtin_ia32_aesdec128kl_u8:
14084+
IID = Intrinsic::x86_aesdec128kl;
14085+
break;
14086+
case X86::BI__builtin_ia32_aesenc256kl_u8:
14087+
IID = Intrinsic::x86_aesenc256kl;
14088+
break;
14089+
case X86::BI__builtin_ia32_aesdec256kl_u8:
14090+
IID = Intrinsic::x86_aesdec256kl;
1410914091
break;
1411014092
}
14111-
case X86::BI__builtin_ia32_aesencwide128kl:
14112-
case X86::BI__builtin_ia32_aesdecwide128kl:
14113-
case X86::BI__builtin_ia32_aesencwide256kl:
14114-
case X86::BI__builtin_ia32_aesdecwide256kl: {
14115-
InOps = {Ops[0], Ops[9], Ops[10], Ops[11], Ops[12], Ops[13],
14116-
Ops[14], Ops[15], Ops[16]};
14117-
FirstReturnOp = 1;
14118-
ResultCount = 8;
14119-
switch (BuiltinID) {
14120-
case X86::BI__builtin_ia32_aesencwide128kl:
14121-
ID = Intrinsic::x86_aesencwide128kl;
14122-
break;
14123-
case X86::BI__builtin_ia32_aesdecwide128kl:
14124-
ID = Intrinsic::x86_aesdecwide128kl;
14125-
break;
14126-
case X86::BI__builtin_ia32_aesencwide256kl:
14127-
ID = Intrinsic::x86_aesencwide256kl;
14128-
break;
14129-
case X86::BI__builtin_ia32_aesdecwide256kl:
14130-
ID = Intrinsic::x86_aesdecwide256kl;
14131-
break;
14132-
}
14093+
14094+
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
14095+
14096+
Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14097+
Ops[0]);
14098+
14099+
return Builder.CreateExtractValue(Call, 0);
14100+
}
14101+
case X86::BI__builtin_ia32_aesencwide128kl_u8:
14102+
case X86::BI__builtin_ia32_aesdecwide128kl_u8:
14103+
case X86::BI__builtin_ia32_aesencwide256kl_u8:
14104+
case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
14105+
Intrinsic::ID IID;
14106+
switch (BuiltinID) {
14107+
case X86::BI__builtin_ia32_aesencwide128kl_u8:
14108+
IID = Intrinsic::x86_aesencwide128kl;
14109+
break;
14110+
case X86::BI__builtin_ia32_aesdecwide128kl_u8:
14111+
IID = Intrinsic::x86_aesdecwide128kl;
14112+
break;
14113+
case X86::BI__builtin_ia32_aesencwide256kl_u8:
14114+
IID = Intrinsic::x86_aesencwide256kl;
14115+
break;
14116+
case X86::BI__builtin_ia32_aesdecwide256kl_u8:
14117+
IID = Intrinsic::x86_aesdecwide256kl;
1413314118
break;
1413414119
}
14120+
14121+
Value *InOps[9];
14122+
InOps[0] = Ops[2];
14123+
for (int i = 0; i != 8; ++i) {
14124+
Value *Ptr = Builder.CreateConstGEP1_32(Ops[1], i);
14125+
InOps[i + 1] = Builder.CreateAlignedLoad(Ptr, Align(16));
1413514126
}
1413614127

14137-
Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), InOps);
14128+
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
1413814129

14139-
for (int i = 0; i < ResultCount; ++i) {
14140-
Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, i + 1),
14141-
Ops[FirstReturnOp + i]);
14130+
for (int i = 0; i != 8; ++i) {
14131+
Value *Extract = Builder.CreateExtractValue(Call, i + 1);
14132+
Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i);
14133+
Builder.CreateAlignedStore(Extract, Ptr, Align(16));
1414214134
}
1414314135

1414414136
return Builder.CreateExtractValue(Call, 0);

clang/lib/Headers/keylockerintrin.h

Lines changed: 12 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi,
211211
/// \endoperation
212212
static __inline__ unsigned char __DEFAULT_FN_ATTRS
213213
_mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
214-
return __builtin_ia32_aesenc128kl(__odata, __idata, __h);
214+
return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
215215
}
216216

217217
/// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using
@@ -248,7 +248,7 @@ _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
248248
/// \endoperation
249249
static __inline__ unsigned char __DEFAULT_FN_ATTRS
250250
_mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
251-
return __builtin_ia32_aesenc256kl(__odata, __idata, __h);
251+
return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
252252
}
253253

254254
/// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using
@@ -285,7 +285,7 @@ _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
285285
/// \endoperation
286286
static __inline__ unsigned char __DEFAULT_FN_ATTRS
287287
_mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
288-
return __builtin_ia32_aesdec128kl(__odata, __idata, __h);
288+
return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
289289
}
290290

291291
/// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using
@@ -322,7 +322,7 @@ _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
322322
/// \endoperation
323323
static __inline__ unsigned char __DEFAULT_FN_ATTRS
324324
_mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
325-
return __builtin_ia32_aesdec256kl(__odata, __idata, __h);
325+
return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
326326
}
327327

328328
#undef __DEFAULT_FN_ATTRS
@@ -374,23 +374,8 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
374374
/// \endoperation
375375
static __inline__ unsigned char __DEFAULT_FN_ATTRS
376376
_mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
377-
return __builtin_ia32_aesencwide128kl(__h,
378-
__odata,
379-
__odata + 1,
380-
__odata + 2,
381-
__odata + 3,
382-
__odata + 4,
383-
__odata + 5,
384-
__odata + 6,
385-
__odata + 7,
386-
__idata[0],
387-
__idata[1],
388-
__idata[2],
389-
__idata[3],
390-
__idata[4],
391-
__idata[5],
392-
__idata[6],
393-
__idata[7]);
377+
return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata,
378+
(const __v2di *)__idata, __h);
394379
}
395380

396381
/// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle
@@ -429,23 +414,8 @@ _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
429414
/// \endoperation
430415
static __inline__ unsigned char __DEFAULT_FN_ATTRS
431416
_mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
432-
return __builtin_ia32_aesencwide256kl(__h,
433-
__odata,
434-
__odata + 1,
435-
__odata + 2,
436-
__odata + 3,
437-
__odata + 4,
438-
__odata + 5,
439-
__odata + 6,
440-
__odata + 7,
441-
__idata[0],
442-
__idata[1],
443-
__idata[2],
444-
__idata[3],
445-
__idata[4],
446-
__idata[5],
447-
__idata[6],
448-
__idata[7]);
417+
return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata,
418+
(const __v2di *)__idata, __h);
449419
}
450420

451421
/// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle
@@ -484,23 +454,8 @@ _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
484454
/// \endoperation
485455
static __inline__ unsigned char __DEFAULT_FN_ATTRS
486456
_mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
487-
return __builtin_ia32_aesdecwide128kl(__h,
488-
__odata,
489-
__odata + 1,
490-
__odata + 2,
491-
__odata + 3,
492-
__odata + 4,
493-
__odata + 5,
494-
__odata + 6,
495-
__odata + 7,
496-
__idata[0],
497-
__idata[1],
498-
__idata[2],
499-
__idata[3],
500-
__idata[4],
501-
__idata[5],
502-
__idata[6],
503-
__idata[7]);
457+
return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata,
458+
(const __v2di *)__idata, __h);
504459
}
505460

506461
/// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle
@@ -539,23 +494,8 @@ _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
539494
/// \endoperation
540495
static __inline__ unsigned char __DEFAULT_FN_ATTRS
541496
_mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
542-
return __builtin_ia32_aesdecwide256kl(__h,
543-
__odata,
544-
__odata + 1,
545-
__odata + 2,
546-
__odata + 3,
547-
__odata + 4,
548-
__odata + 5,
549-
__odata + 6,
550-
__odata + 7,
551-
__idata[0],
552-
__idata[1],
553-
__idata[2],
554-
__idata[3],
555-
__idata[4],
556-
__idata[5],
557-
__idata[6],
558-
__idata[7]);
497+
return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata,
498+
(const __v2di *)__idata, __h);
559499
}
560500

561501
#undef __DEFAULT_FN_ATTRS

0 commit comments

Comments
 (0)