|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,-avx | FileCheck %s --check-prefix=SSE2 |
3 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx | FileCheck %s --check-prefix=SSE41 |
4 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 | FileCheck %s --check-prefixes=AVX,AVX1 |
5 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 |
| 2 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,-avx | FileCheck %s --check-prefixes=CHECK,SSE2 |
| 3 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx | FileCheck %s --check-prefixes=CHECK,SSE41 |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 |
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 |
6 | 6 |
|
7 | 7 | define i32 @veccond128(<4 x i32> %input) {
|
8 | 8 | ; SSE2-LABEL: veccond128:
|
@@ -388,3 +388,163 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
|
388 | 388 | %t2 = select i1 %t1, i32 %a, i32 %b
|
389 | 389 | ret i32 %t2
|
390 | 390 | }
|
| 391 | + |
| 392 | +define i1 @vecmp_load64x2(ptr %p0) { |
| 393 | +; CHECK-LABEL: vecmp_load64x2: |
| 394 | +; CHECK: # %bb.0: |
| 395 | +; CHECK-NEXT: movq (%rdi), %rax |
| 396 | +; CHECK-NEXT: orq 8(%rdi), %rax |
| 397 | +; CHECK-NEXT: sete %al |
| 398 | +; CHECK-NEXT: retq |
| 399 | + %p1 = getelementptr i8, ptr %p0, i64 8 |
| 400 | + %i0 = load i64, ptr %p0, align 1 |
| 401 | + %i1 = load i64, ptr %p1, align 1 |
| 402 | + %or = or i64 %i0, %i1 |
| 403 | + %ne = icmp ne i64 %or, 0 |
| 404 | + %zx = zext i1 %ne to i32 |
| 405 | + %eq = icmp eq i32 %zx, 0 |
| 406 | + ret i1 %eq |
| 407 | +} |
| 408 | + |
| 409 | +define i1 @vecmp_load64x4(ptr %p0) { |
| 410 | +; CHECK-LABEL: vecmp_load64x4: |
| 411 | +; CHECK: # %bb.0: |
| 412 | +; CHECK-NEXT: movq (%rdi), %rax |
| 413 | +; CHECK-NEXT: movq 8(%rdi), %rcx |
| 414 | +; CHECK-NEXT: orq 16(%rdi), %rax |
| 415 | +; CHECK-NEXT: orq 24(%rdi), %rcx |
| 416 | +; CHECK-NEXT: orq %rax, %rcx |
| 417 | +; CHECK-NEXT: sete %al |
| 418 | +; CHECK-NEXT: retq |
| 419 | + %p1 = getelementptr i8, ptr %p0, i64 8 |
| 420 | + %p2 = getelementptr i8, ptr %p0, i64 16 |
| 421 | + %p3 = getelementptr i8, ptr %p0, i64 24 |
| 422 | + %i0 = load i64, ptr %p0, align 1 |
| 423 | + %i1 = load i64, ptr %p1, align 1 |
| 424 | + %i2 = load i64, ptr %p2, align 1 |
| 425 | + %i3 = load i64, ptr %p3, align 1 |
| 426 | + %or02 = or i64 %i0, %i2 |
| 427 | + %or13 = or i64 %i1, %i3 |
| 428 | + %or = or i64 %or02, %or13 |
| 429 | + %ne = icmp ne i64 %or, 0 |
| 430 | + %zx = zext i1 %ne to i32 |
| 431 | + %eq = icmp eq i32 %zx, 0 |
| 432 | + ret i1 %eq |
| 433 | +} |
| 434 | + |
| 435 | +define i1 @vecmp_load128x2(ptr %p0) { |
| 436 | +; CHECK-LABEL: vecmp_load128x2: |
| 437 | +; CHECK: # %bb.0: |
| 438 | +; CHECK-NEXT: movq (%rdi), %rax |
| 439 | +; CHECK-NEXT: movq 8(%rdi), %rcx |
| 440 | +; CHECK-NEXT: orq 24(%rdi), %rcx |
| 441 | +; CHECK-NEXT: orq 16(%rdi), %rax |
| 442 | +; CHECK-NEXT: orq %rcx, %rax |
| 443 | +; CHECK-NEXT: sete %al |
| 444 | +; CHECK-NEXT: retq |
| 445 | + %p1 = getelementptr i8, ptr %p0, i64 16 |
| 446 | + %i0 = load i128, ptr %p0, align 1 |
| 447 | + %i1 = load i128, ptr %p1, align 1 |
| 448 | + %or = or i128 %i0, %i1 |
| 449 | + %ne = icmp ne i128 %or, 0 |
| 450 | + %zx = zext i1 %ne to i32 |
| 451 | + %eq = icmp eq i32 %zx, 0 |
| 452 | + ret i1 %eq |
| 453 | +} |
| 454 | + |
| 455 | +define i1 @vecmp_load128x4(ptr %p0) { |
| 456 | +; CHECK-LABEL: vecmp_load128x4: |
| 457 | +; CHECK: # %bb.0: |
| 458 | +; CHECK-NEXT: movq (%rdi), %rax |
| 459 | +; CHECK-NEXT: movq 8(%rdi), %rcx |
| 460 | +; CHECK-NEXT: movq 24(%rdi), %rdx |
| 461 | +; CHECK-NEXT: movq 16(%rdi), %rsi |
| 462 | +; CHECK-NEXT: orq 32(%rdi), %rax |
| 463 | +; CHECK-NEXT: orq 40(%rdi), %rcx |
| 464 | +; CHECK-NEXT: orq 48(%rdi), %rsi |
| 465 | +; CHECK-NEXT: orq %rax, %rsi |
| 466 | +; CHECK-NEXT: orq 56(%rdi), %rdx |
| 467 | +; CHECK-NEXT: orq %rcx, %rdx |
| 468 | +; CHECK-NEXT: orq %rsi, %rdx |
| 469 | +; CHECK-NEXT: sete %al |
| 470 | +; CHECK-NEXT: retq |
| 471 | + %p1 = getelementptr i8, ptr %p0, i64 16 |
| 472 | + %p2 = getelementptr i8, ptr %p0, i64 32 |
| 473 | + %p3 = getelementptr i8, ptr %p0, i64 48 |
| 474 | + %i0 = load i128, ptr %p0, align 1 |
| 475 | + %i1 = load i128, ptr %p1, align 1 |
| 476 | + %i2 = load i128, ptr %p2, align 1 |
| 477 | + %i3 = load i128, ptr %p3, align 1 |
| 478 | + %or02 = or i128 %i0, %i2 |
| 479 | + %or13 = or i128 %i1, %i3 |
| 480 | + %or = or i128 %or02, %or13 |
| 481 | + %ne = icmp ne i128 %or, 0 |
| 482 | + %zx = zext i1 %ne to i32 |
| 483 | + %eq = icmp eq i32 %zx, 0 |
| 484 | + ret i1 %eq |
| 485 | +} |
| 486 | + |
| 487 | +; PR144861 |
| 488 | +define i1 @vecmp_load256x2(ptr %p0) { |
| 489 | +; CHECK-LABEL: vecmp_load256x2: |
| 490 | +; CHECK: # %bb.0: |
| 491 | +; CHECK-NEXT: movq 24(%rdi), %rax |
| 492 | +; CHECK-NEXT: movq (%rdi), %rcx |
| 493 | +; CHECK-NEXT: movq 8(%rdi), %rdx |
| 494 | +; CHECK-NEXT: movq 16(%rdi), %rsi |
| 495 | +; CHECK-NEXT: orq 48(%rdi), %rsi |
| 496 | +; CHECK-NEXT: orq 32(%rdi), %rcx |
| 497 | +; CHECK-NEXT: orq %rsi, %rcx |
| 498 | +; CHECK-NEXT: orq 56(%rdi), %rax |
| 499 | +; CHECK-NEXT: orq 40(%rdi), %rdx |
| 500 | +; CHECK-NEXT: orq %rax, %rdx |
| 501 | +; CHECK-NEXT: orq %rcx, %rdx |
| 502 | +; CHECK-NEXT: sete %al |
| 503 | +; CHECK-NEXT: retq |
| 504 | + %p1 = getelementptr i8, ptr %p0, i64 32 |
| 505 | + %i0 = load i256, ptr %p0, align 1 |
| 506 | + %i1 = load i256, ptr %p1, align 1 |
| 507 | + %or = or i256 %i0, %i1 |
| 508 | + %ne = icmp ne i256 %or, 0 |
| 509 | + %zx = zext i1 %ne to i32 |
| 510 | + %eq = icmp eq i32 %zx, 0 |
| 511 | + ret i1 %eq |
| 512 | +} |
| 513 | + |
| 514 | +define i1 @vecmp_load512x2(ptr %p0) { |
| 515 | +; CHECK-LABEL: vecmp_load512x2: |
| 516 | +; CHECK: # %bb.0: |
| 517 | +; CHECK-NEXT: movq 24(%rdi), %rax |
| 518 | +; CHECK-NEXT: movq 56(%rdi), %rdx |
| 519 | +; CHECK-NEXT: movq 40(%rdi), %rsi |
| 520 | +; CHECK-NEXT: movq 16(%rdi), %rcx |
| 521 | +; CHECK-NEXT: movq 48(%rdi), %r8 |
| 522 | +; CHECK-NEXT: movq (%rdi), %r9 |
| 523 | +; CHECK-NEXT: movq 8(%rdi), %r10 |
| 524 | +; CHECK-NEXT: movq 32(%rdi), %r11 |
| 525 | +; CHECK-NEXT: orq 96(%rdi), %r11 |
| 526 | +; CHECK-NEXT: orq 64(%rdi), %r9 |
| 527 | +; CHECK-NEXT: orq %r11, %r9 |
| 528 | +; CHECK-NEXT: orq 112(%rdi), %r8 |
| 529 | +; CHECK-NEXT: orq 80(%rdi), %rcx |
| 530 | +; CHECK-NEXT: orq %r8, %rcx |
| 531 | +; CHECK-NEXT: orq %r9, %rcx |
| 532 | +; CHECK-NEXT: orq 104(%rdi), %rsi |
| 533 | +; CHECK-NEXT: orq 72(%rdi), %r10 |
| 534 | +; CHECK-NEXT: orq %rsi, %r10 |
| 535 | +; CHECK-NEXT: orq 120(%rdi), %rdx |
| 536 | +; CHECK-NEXT: orq 88(%rdi), %rax |
| 537 | +; CHECK-NEXT: orq %rdx, %rax |
| 538 | +; CHECK-NEXT: orq %r10, %rax |
| 539 | +; CHECK-NEXT: orq %rcx, %rax |
| 540 | +; CHECK-NEXT: sete %al |
| 541 | +; CHECK-NEXT: retq |
| 542 | + %p1 = getelementptr i8, ptr %p0, i64 64 |
| 543 | + %i0 = load i512, ptr %p0, align 1 |
| 544 | + %i1 = load i512, ptr %p1, align 1 |
| 545 | + %or = or i512 %i0, %i1 |
| 546 | + %ne = icmp ne i512 %or, 0 |
| 547 | + %zx = zext i1 %ne to i32 |
| 548 | + %eq = icmp eq i32 %zx, 0 |
| 549 | + ret i1 %eq |
| 550 | +} |
0 commit comments