-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[AArch64][clang][llvm] Add structured sparsity outer product (TMOP) intrinsics #135145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…nstrinics Implement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm following the ACLE in https://github.com/ARM-software/acle/pull/380/files
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-aarch64 Author: Jonathan Thackray (jthackray) ChangesImplement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm following the ACLE in https://github.com/ARM-software/acle/pull/380/files Patch is 43.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135145.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 1bfcf4c31d552..d425c2c1e38bb 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -907,6 +907,48 @@ let SMETargetGuard = "sme-f16f16" in {
}
+////////////////////////////////////////////////////////////////////////////////
+// SME2 - TMOP, SUTMOP, USTMOP
+
+multiclass USTMOP<string za, string t, list<ImmCheck> checks> {
+ def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]",
+ "vi2x[i", t, MergeNone, "aarch64_sme_ustmopa",
+ [IsStreaming, IsInOutZA],
+ checks>;
+}
+
+multiclass SUTMOP<string za, string t, list<ImmCheck> checks> {
+ def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]",
+ "vi2u[i", t, MergeNone, "aarch64_sme_sutmopa",
+ [IsStreaming, IsInOutZA],
+ checks>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop" in {
+ def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+ def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+ def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+
+ defm SVSUTMOPA_S : SUTMOP<"za32", "c", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+ defm SVUSTMOPA_S : USTMOP<"za32", "Uc", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in {
+ def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "hb", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in {
+ def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "U", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in {
+ def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in {
+ def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
let SMETargetGuard = "sme2p1" in {
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
new file mode 100644
index 0000000000000..a1456275dd810
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
@@ -0,0 +1,112 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme.h>
+
+#ifdef SME_OVERLOADED_FORMS
+#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_s8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_s8_s8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_u8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_u8_u8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_s8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_s8_u8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_u8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_u8_s8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_s16_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_s16_s16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_u16_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_u16_u16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_f16_f16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_bf16_bf16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za16,_f16_f16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za16,_bf16_bf16,)(1, zn, zm, zk, 3);
+}
+
+//void test_svtmopa_lane_za16_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
+// SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
+//}
+
+//void test_svtmopa_lane_za32_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
+// SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
+//}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp
new file mode 100644
index 0000000000000..6ccd3acaf749f
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp
@@ -0,0 +1,176 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -target-feature +sme-f16f16 -target-feature +sme-b16b16 -verify -emit-llvm -o - %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+void test_features() __arm_streaming __arm_inout("za") {
+ svuint8x2_t zn_u8;
+ svint8x2_t zn_s8;
+ svuint8_t zm_u8;
+ svint8_t zm_s8;
+ svuint16x2_t zn_u16;
+ svint16x2_t zn_s16;
+ svuint16_t zm_u16;
+ svint16_t zm_s16;
+ svbfloat16x2_t zn_bf16;
+ svfloat16x2_t zn_f16;
+ svbfloat16_t zm_bf16;
+ svfloat16_t zm_f16;
+ svfloat32x2_t zn_f32;
+ svfloat32_t zm_f32;
+ fpm_t fpm = 0;
+ svuint8_t zk;
+
+// expected-error@+1 {{'svtmopa_lane_za32_s8_s8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_u8_u8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_s8_u8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_u8_s8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_s16_s16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_u16_u16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_f16_f16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_bf16_bf16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za16_f16_f16' needs target feature sme,sme2,sme-tmop,sme-f16f16}}
+ svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za16_bf16_bf16' needs target feature sme,sme2,sme-tmop,sme-f16f16}}
+ svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za16_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f16}}
+ svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm);
+// expected-error@+1 {{'svtmopa_lane_za32_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f32}}
+ svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm);
+}
+
+void test_imm() __arm_streaming __arm_inout("za") {
+ svuint8x2_t zn_u8;
+ svint8x2_t zn_s8;
+ svuint8_t zm_u8;
+ svint8_t zm_s8;
+ svuint16x2_t zn_u16;
+ svint16x2_t zn_s16;
+ svuint16_t zm_u16;
+ svint16_t zm_s16;
+ svbfloat16x2_t zn_bf16;
+ svfloat16x2_t zn_f16;
+ svbfloat16_t zm_bf16;
+ svfloat16_t zm_f16;
+ svfloat32x2_t zn_f32;
+ svfloat32_t zm_f32;
+ fpm_t fpm;
+ svuint8_t zk;
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(3, zn_s8, zm_s8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(4, zn_s8, zm_s8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(-1, zn_s8, zm_s8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(3, zn_u8, zm_u8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(4, zn_u8, zm_u8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(-1, zn_u8, zm_u8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(3, zn_s8, zm_u8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(4, zn_s8, zm_u8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(-1, zn_s8, zm_u8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(3, zn_u8, zm_s8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(4, zn_u8, zm_s8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(-1, zn_u8, zm_s8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(3, zn_s16, zm_s16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(4, zn_s16, zm_s16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(-1, zn_s16, zm_s16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(3, zn_u16, zm_u16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(4, zn_u16, zm_u16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(-1, zn_u16, zm_u16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(3, zn_f16, zm_f16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(4, zn_f16, zm_f16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(-1, zn_f16, zm_f16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(3, zn_f16, zm_f16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(4, zn_f16, zm_f16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(-1, zn_f16, zm_f16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm);
+}
dif...
[truncated]
|
@llvm/pr-subscribers-llvm-globalisel Author: Jonathan Thackray (jthackray) ChangesImplement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm following the ACLE in https://github.com/ARM-software/acle/pull/380/files Patch is 43.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135145.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 1bfcf4c31d552..d425c2c1e38bb 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -907,6 +907,48 @@ let SMETargetGuard = "sme-f16f16" in {
}
+////////////////////////////////////////////////////////////////////////////////
+// SME2 - TMOP, SUTMOP, USTMOP
+
+multiclass USTMOP<string za, string t, list<ImmCheck> checks> {
+ def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]",
+ "vi2x[i", t, MergeNone, "aarch64_sme_ustmopa",
+ [IsStreaming, IsInOutZA],
+ checks>;
+}
+
+multiclass SUTMOP<string za, string t, list<ImmCheck> checks> {
+ def _ : SInst<"svtmopa_lane_" # za # "[_{d}_{3}]",
+ "vi2u[i", t, MergeNone, "aarch64_sme_sutmopa",
+ [IsStreaming, IsInOutZA],
+ checks>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop" in {
+ def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+ def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+ def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+
+ defm SVSUTMOPA_S : SUTMOP<"za32", "c", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+ defm SVUSTMOPA_S : USTMOP<"za32", "Uc", [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in {
+ def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "hb", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in {
+ def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "U", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in {
+ def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in {
+ def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.MM[i>", "m", MergeNone, "aarch64_sme_tmopa", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+}
+
multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
let SMETargetGuard = "sme2p1" in {
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
new file mode 100644
index 0000000000000..a1456275dd810
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
@@ -0,0 +1,112 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme.h>
+
+#ifdef SME_OVERLOADED_FORMS
+#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_s8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_s8_s8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_u8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_u8_u8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_s8_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_s8_u8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_u8_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_u8_s8,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_s16_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_s16_s16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_u16_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_u16_u16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_f16_f16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za32,_bf16_bf16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za16,_f16_f16,)(1, zn, zm, zk, 3);
+}
+
+// CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.tmopa.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT: ret void
+//
+void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
+ SME_ACLE_FUNC(svtmopa_lane_za16,_bf16_bf16,)(1, zn, zm, zk, 3);
+}
+
+//void test_svtmopa_lane_za16_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
+// SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
+//}
+
+//void test_svtmopa_lane_za32_mf8_mf8_fpm(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
+// SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
+//}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp
new file mode 100644
index 0000000000000..6ccd3acaf749f
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp
@@ -0,0 +1,176 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -target-feature +sme-f16f16 -target-feature +sme-b16b16 -verify -emit-llvm -o - %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+void test_features() __arm_streaming __arm_inout("za") {
+ svuint8x2_t zn_u8;
+ svint8x2_t zn_s8;
+ svuint8_t zm_u8;
+ svint8_t zm_s8;
+ svuint16x2_t zn_u16;
+ svint16x2_t zn_s16;
+ svuint16_t zm_u16;
+ svint16_t zm_s16;
+ svbfloat16x2_t zn_bf16;
+ svfloat16x2_t zn_f16;
+ svbfloat16_t zm_bf16;
+ svfloat16_t zm_f16;
+ svfloat32x2_t zn_f32;
+ svfloat32_t zm_f32;
+ fpm_t fpm = 0;
+ svuint8_t zk;
+
+// expected-error@+1 {{'svtmopa_lane_za32_s8_s8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_u8_u8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_s8_u8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_u8_s8' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_s16_s16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_u16_u16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_f16_f16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za32_bf16_bf16' needs target feature sme,sme2,sme-tmop}}
+ svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za16_f16_f16' needs target feature sme,sme2,sme-tmop,sme-f16f16}}
+ svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za16_bf16_bf16' needs target feature sme,sme2,sme-tmop,sme-f16f16}}
+ svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0);
+// expected-error@+1 {{'svtmopa_lane_za16_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f16}}
+ svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm);
+// expected-error@+1 {{'svtmopa_lane_za32_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f32}}
+ svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, 0, fpm);
+}
+
+void test_imm() __arm_streaming __arm_inout("za") {
+ svuint8x2_t zn_u8;
+ svint8x2_t zn_s8;
+ svuint8_t zm_u8;
+ svint8_t zm_s8;
+ svuint16x2_t zn_u16;
+ svint16x2_t zn_s16;
+ svuint16_t zm_u16;
+ svint16_t zm_s16;
+ svbfloat16x2_t zn_bf16;
+ svfloat16x2_t zn_f16;
+ svbfloat16_t zm_bf16;
+ svfloat16_t zm_f16;
+ svfloat32x2_t zn_f32;
+ svfloat32_t zm_f32;
+ fpm_t fpm;
+ svuint8_t zk;
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(3, zn_s8, zm_s8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(4, zn_s8, zm_s8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_s8(-1, zn_s8, zm_s8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(3, zn_u8, zm_u8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(4, zn_u8, zm_u8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_u8(-1, zn_u8, zm_u8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(3, zn_s8, zm_u8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(4, zn_s8, zm_u8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s8_u8(-1, zn_s8, zm_u8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(3, zn_u8, zm_s8, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(4, zn_u8, zm_s8, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u8_s8(-1, zn_u8, zm_s8, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(3, zn_s16, zm_s16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(4, zn_s16, zm_s16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_s16_s16(-1, zn_s16, zm_s16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(3, zn_u16, zm_u16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(4, zn_u16, zm_u16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_u16_u16(-1, zn_u16, zm_u16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(3, zn_f16, zm_f16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(4, zn_f16, zm_f16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_f16_f16(-1, zn_f16, zm_f16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(3, zn_f16, zm_f16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(4, zn_f16, zm_f16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_f16_f16(-1, zn_f16, zm_f16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(3, zn_bf16, zm_bf16, zk, 4);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(4, zn_bf16, zm_bf16, zk, 3);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, -1);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_bf16_bf16(-1, zn_bf16, zm_bf16, zk, 0);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za16_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm);
+
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(3, zn_f32, zm_f32, zk, 4, fpm);
+// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(4, zn_f32, zm_f32, zk, 3, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f32, zm_f32, zk, -1, fpm);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svtmopa_lane_za32_mf8_mf8_fpm(-1, zn_f32, zm_f32, zk, 0, fpm);
+}
dif...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Jonathan,
I left some comments, let me know if something is not clear.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi Jonathan,
I notice some things that needs to be fixesd
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Jonathan for the changes, we are almost there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Jonathan,
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/27/builds/8710 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/81/builds/6373 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/14573 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/16113 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/169/builds/10523 Here is the relevant piece of the build log for the reference
|
…ntrinsics (llvm#135145) Implement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm following the ACLE in https://github.com/ARM-software/acle/pull/380/files
Implement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm following the ACLE in https://github.com/ARM-software/acle/pull/380/files