From 88c2e7896b991e503b889da25e5c6a948586b16b Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 4 Nov 2023 19:11:15 +0000 Subject: [PATCH] Implement aarch64 addp intrinsics --- example/neon.rs | 56 ++++++++++++++++++++++++++++++++++ src/intrinsics/llvm_aarch64.rs | 12 ++++++++ 2 files changed, 68 insertions(+) diff --git a/example/neon.rs b/example/neon.rs index 0e23e862df1fa..6ea053d0b0046 100644 --- a/example/neon.rs +++ b/example/neon.rs @@ -131,6 +131,55 @@ unsafe fn test_vpmax_f32() { assert_eq!(r, e); } +#[cfg(target_arch = "aarch64")] +unsafe fn test_vpadd_s16() { + let a = i16x4::from([1, 2, 3, 4]); + let b = i16x4::from([0, -1, -2, -3]); + let r: i16x4 = transmute(vpadd_s16(transmute(a), transmute(b))); + let e = i16x4::from([3, 7, -1, -5]); + assert_eq!(r, e); +} +#[cfg(target_arch = "aarch64")] +unsafe fn test_vpadd_s32() { + let a = i32x2::from([1, 2]); + let b = i32x2::from([0, -1]); + let r: i32x2 = transmute(vpadd_s32(transmute(a), transmute(b))); + let e = i32x2::from([3, -1]); + assert_eq!(r, e); +} +#[cfg(target_arch = "aarch64")] +unsafe fn test_vpadd_s8() { + let a = i8x8::from([1, 2, 3, 4, 5, 6, 7, 8]); + let b = i8x8::from([0, -1, -2, -3, -4, -5, -6, -7]); + let r: i8x8 = transmute(vpadd_s8(transmute(a), transmute(b))); + let e = i8x8::from([3, 7, 11, 15, -1, -5, -9, -13]); + assert_eq!(r, e); +} +#[cfg(target_arch = "aarch64")] +unsafe fn test_vpadd_u16() { + let a = u16x4::from([1, 2, 3, 4]); + let b = u16x4::from([30, 31, 32, 33]); + let r: u16x4 = transmute(vpadd_u16(transmute(a), transmute(b))); + let e = u16x4::from([3, 7, 61, 65]); + assert_eq!(r, e); +} +#[cfg(target_arch = "aarch64")] +unsafe fn test_vpadd_u32() { + let a = u32x2::from([1, 2]); + let b = u32x2::from([30, 31]); + let r: u32x2 = transmute(vpadd_u32(transmute(a), transmute(b))); + let e = u32x2::from([3, 61]); + assert_eq!(r, e); +} +#[cfg(target_arch = "aarch64")] +unsafe fn test_vpadd_u8() { + let a = u8x8::from([1, 2, 3, 4, 5, 6, 7, 8]); + let b = u8x8::from([30, 31, 32, 33, 34, 35, 36, 37]); + let r: u8x8 = transmute(vpadd_u8(transmute(a), transmute(b))); + let e = u8x8::from([3, 7, 11, 15, 61, 65, 69, 73]); + assert_eq!(r, e); +} + #[cfg(target_arch = "aarch64")] fn main() { unsafe { @@ -148,6 +197,13 @@ fn main() { test_vpmax_u16(); test_vpmax_u32(); test_vpmax_f32(); + + test_vpadd_s16(); + test_vpadd_s32(); + test_vpadd_s8(); + test_vpadd_u16(); + test_vpadd_u32(); + test_vpadd_u8(); } } diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index ed318a89fa05f..fdad5a474d6d1 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -228,6 +228,18 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + _ if intrinsic.starts_with("llvm.aarch64.neon.addp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().iadd(x_lane, y_lane), + ); + } + // FIXME generalize vector types "llvm.aarch64.neon.tbl1.v16i8" => { intrinsic_args!(fx, args => (t, idx); intrinsic);