Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ppc64le vector abi with tests #17205

Merged
merged 1 commit into from
Jul 1, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,9 @@ Void() = nothing

immutable VecElement{T}
value::T
VecElement(value::T) = new(value) # disable converting constructor in Core
end
VecElement{T}(arg::T) = VecElement{T}(arg)

Expr(args::ANY...) = _expr(args...)

Expand Down
3 changes: 3 additions & 0 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ include("operators.jl")
include("pointer.jl")
include("refpointer.jl")
(::Type{T}){T}(arg) = convert(T, arg)::T
(::Type{VecElement{T}}){T}(arg) = VecElement{T}(convert(T, arg))
convert{T<:VecElement}(::Type{T}, arg) = T(arg)
convert{T<:VecElement}(::Type{T}, arg::T) = arg
include("checked.jl")
importall .Checked

Expand Down
88 changes: 72 additions & 16 deletions src/abi_ppc64le.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,50 @@ typedef bool AbiState;
AbiState default_abi_state = 0;

// count the homogeneous floating agregate size (saturating at max count of 8)
static unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0)
static unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva)
{
size_t i, l = ty->nfields;
// handle homogeneous float aggregates
if (l == 0) {
if (*ty0 == NULL) {
if (ty == jl_float64_type || ty == jl_float32_type)
*ty0 = ty;
if (ty != jl_float64_type && ty != jl_float32_type)
return 9;
*hva = false;
if (*ty0 == NULL)
*ty0 = ty;
else if (*hva || ty->size != (*ty0)->size)
return 9;
return 1;
}

// handle homogeneous vector aggregates
jl_datatype_t *fld0 = (jl_datatype_t*)jl_field_type(ty, 0);
if (!jl_is_datatype(fld0) || ty->name == jl_vecelement_typename)
return 9;
if (fld0->name == jl_vecelement_typename) {
if (!jl_is_bitstype(jl_tparam0(fld0)) || jl_datatype_size(ty) > 16)
return 9;
if (l != 1 && l != 2 && l != 4 && l != 8 && l != 16)
return 9;
*hva = true;
if (*ty0 == NULL)
*ty0 = ty;
else if (!*hva || ty->size != (*ty0)->size)
return 9;
for (i = 1; i < l; i++) {
jl_datatype_t *fld = (jl_datatype_t*)jl_field_type(ty, i);
if (fld != fld0)
return 9;
}
return ty == *ty0 ? 1 : 9;
return 1;
}

// recurse through other struct types
int n = 0;
for (i = 0; i < l; i++) {
jl_value_t *fld = jl_field_type(ty, i);
jl_datatype_t *fld = (jl_datatype_t*)jl_field_type(ty, i);
if (!jl_is_datatype(fld))
return 9;
n += isHFA((jl_datatype_t*)fld, ty0);
n += isHFA((jl_datatype_t*)fld, ty0, hva);
if (n > 8)
return 9;
}
Expand All @@ -70,7 +98,8 @@ bool use_sret(AbiState *state, jl_value_t *ty)
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty)
jl_datatype_t *dt = (jl_datatype_t*)ty;
jl_datatype_t *ty0 = NULL;
if (dt->size > 16 && isHFA(dt, &ty0) > 8)
bool hva = false;
if (dt->size > 16 && isHFA(dt, &ty0, &hva) > 8)
return true;
return false;
}
Expand All @@ -79,8 +108,10 @@ void needPassByRef(AbiState *state, jl_value_t *ty, bool *byRef, bool *inReg)
{
if (!jl_is_datatype(ty) || jl_is_abstracttype(ty) || jl_is_cpointer_type(ty) || jl_is_array_type(ty))
return;
size_t size = jl_datatype_size(ty);
if (size > 64)
jl_datatype_t *dt = (jl_datatype_t*)ty;
jl_datatype_t *ty0 = NULL;
bool hva = false;
if (dt->size > 64 && isHFA(dt, &ty0, &hva) > 8)
*byRef = true;
}

Expand All @@ -96,12 +127,37 @@ Type *preferred_llvm_type(jl_value_t *ty, bool isret)
return NULL;
// legalize this into [n x f32/f64]
jl_datatype_t *ty0 = NULL;
int hfa = isHFA(dt, &ty0);
if (hfa <= 8)
return ArrayType::get(ty0 == jl_float32_type ? T_float32 : T_float64, hfa);
// rewrite integer-sized (non-HFA) struct to an array of i64
if (size > 8)
return ArrayType::get(T_int64, (size + 7) / 8);
bool hva = false;
int hfa = isHFA(dt, &ty0, &hva);
if (hfa <= 8) {
if (ty0 == jl_float32_type) {
return ArrayType::get(T_float32, hfa);
}
else if (ty0 == jl_float64_type) {
return ArrayType::get(T_float64, hfa);
}
else {
jl_datatype_t *vecty = (jl_datatype_t*)jl_field_type(ty0, 0);
assert(jl_is_datatype(vecty) && vecty->name == jl_vecelement_typename);
jl_value_t *elemty = jl_tparam0(vecty);
assert(jl_is_bitstype(elemty));

Type *ety = julia_type_to_llvm(elemty);
Type *vty = VectorType::get(ety, ty0->nfields);
return ArrayType::get(vty, hfa);
}
}
// rewrite integer-sized (non-HFA) struct to an array
// the bitsize of the integer gives the desired alignment
if (size > 8) {
if (dt->alignment <= 8) {
return ArrayType::get(T_int64, (size + 7) / 8);
}
else {
Type *T_int128 = Type::getIntNTy(jl_LLVMContext, 128);
return ArrayType::get(T_int128, (size + 15) / 16);
}
}
return Type::getIntNTy(jl_LLVMContext, size * 8);
}

Expand Down
113 changes: 111 additions & 2 deletions src/ccalltest.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,13 +595,13 @@ JL_DLLEXPORT int64x2_t test_aa64_vec_1(int32x2_t v1, float _v2, int32x2_t v3)
return vmovl_s32(v1 * v2 + v3);
}

// This is a homogenious short vector aggregate
// This is a homogeneous short vector aggregate
typedef struct {
int8x8_t v1;
float32x2_t v2;
} struct_aa64_3;

// This is NOT a homogenious short vector aggregate
// This is NOT a homogeneous short vector aggregate
typedef struct {
float32x2_t v2;
int16x8_t v1;
Expand All @@ -614,3 +614,112 @@ JL_DLLEXPORT struct_aa64_3 test_aa64_vec_2(struct_aa64_3 v1, struct_aa64_4 v2)
}

#endif

#if defined(_CPU_PPC64_)

typedef int32_t int32x2_t __attribute__ ((vector_size (8)));
typedef float float32x2_t __attribute__ ((vector_size (8)));
typedef int32_t int32x4_t __attribute__ ((vector_size (16)));
typedef float float32x4_t __attribute__ ((vector_size (16)));
typedef double float64x2_t __attribute__ ((vector_size (16)));

typedef struct {
int64_t m;
float32x4_t v;
} struct_huge1_ppc64;

typedef struct {
float32x4_t v1;
int32x2_t v2;
} struct_huge2_ppc64;

typedef struct {
float32x4_t v1;
struct {
float f1;
float f2;
float f3;
float f4;
};
} struct_huge3_ppc64;

typedef struct {
float32x2_t v1;
float64x2_t v2;
} struct_huge4_ppc64;

typedef struct {
float32x4_t v1[9];
} struct_huge5_ppc64;

typedef struct {
float32x4_t v1[8];
float32x4_t v2;
} struct_huge6_ppc64;

typedef struct {
float32x4_t v1[8];
} struct_huge1_ppc64_hva;

typedef struct {
struct {
float32x4_t vf[2];
} v[2];
} struct_huge2_ppc64_hva;

typedef struct {
float32x4_t vf1;
struct {
float32x4_t vf2[2];
};
} struct_huge3_ppc64_hva;

typedef struct {
int32x4_t v1;
float32x4_t v2;
} struct_huge4_ppc64_hva;

typedef struct {
float32x4_t v1;
float64x2_t v2;
} struct_huge5_ppc64_hva;

test_huge(1_ppc64, m);
test_huge(2_ppc64, v1[0]);
test_huge(3_ppc64, v1[0]);
test_huge(4_ppc64, v1[0]);
test_huge(5_ppc64, v1[0][0]);
test_huge(6_ppc64, v1[0][0]);
test_huge(1_ppc64_hva, v1[0][0]);
test_huge(2_ppc64_hva, v[0].vf[0][0]);
test_huge(3_ppc64_hva, vf1[0]);
test_huge(4_ppc64_hva, v1[0]);
test_huge(5_ppc64_hva, v1[0]);

JL_DLLEXPORT int64_t test_ppc64_vec1long(
int64_t d1, int64_t d2, int64_t d3, int64_t d4, int64_t d5, int64_t d6,
int64_t d7, int64_t d8, int64_t d9, struct_huge1_ppc64 vs)
{
return d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + vs.m + vs.v[0] + vs.v[1] + vs.v[2] + vs.v[3];
}

JL_DLLEXPORT int64_t test_ppc64_vec1long_vec(
int64_t d1, int64_t d2, int64_t d3, int64_t d4, int64_t d5, int64_t d6,
int64_t d7, int64_t d8, int64_t d9, float32x4_t vs)
{
return d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + vs[0] + vs[1] + vs[2] + vs[3];
}

JL_DLLEXPORT float32x4_t test_ppc64_vec2(int64_t d1, float32x4_t a, float32x4_t b, float32x4_t c, float32x4_t d,
float32x4_t e, float32x4_t f, float32x4_t g, float32x4_t h, float32x4_t i,
float32x4_t j, float32x4_t k, float32x4_t l, float32x4_t m, float32x4_t n)
{
float32x4_t r;
r[0] = d1 + a[0] + b[0] + c[0] + d[0] + e[0] + f[0] + g[0] + h[0] + i[0] + j[0] + k[0] + l[0] + m[0] + n[0];
r[1] = d1 + a[1] + b[1] + c[1] + d[1] + e[1] + f[1] + g[1] + h[1] + i[1] + j[1] + k[1] + l[1] + m[1] + n[1];
r[2] = d1 + a[2] + b[2] + c[2] + d[2] + e[2] + f[2] + g[2] + h[2] + i[2] + j[2] + k[2] + l[2] + m[2] + n[2];
r[3] = d1 + a[3] + b[3] + c[3] + d[3] + e[3] + f[3] + g[3] + h[3] + i[3] + j[3] + k[3] + l[3] + m[3] + n[3];
return r;
}

#endif
2 changes: 0 additions & 2 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3757,7 +3757,6 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
jl_cgval_t retval;
if (lam == NULL) {
assert(theFptr);
assert(nargs >= 0);
#ifdef LLVM37
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If these assertions are correct, might as well keep them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the same thing but that var is unsigned?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right.

Value *ret = builder.CreateCall(prepare_call(theFptr), {myargs,
ConstantInt::get(T_int32, nargs + 1)});
Expand All @@ -3780,7 +3779,6 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
}
else {
assert(theFptr);
assert(nargs >= 0);
// for jlcall, we need to pass the function object even if it is a ghost.
// here we reconstruct the function instance from its type (first elt of argt)
Value *theF = literal_pointer_val((jl_value_t*)ff);
Expand Down
1 change: 0 additions & 1 deletion src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,6 @@ jl_lambda_info_t *jl_compile_for_dispatch(jl_lambda_info_t *li)
jl_compile_linfo(li);
}
}
jl_generate_fptr(li);
return li;
}

Expand Down
5 changes: 4 additions & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ jl_lambda_info_t *jl_get_unspecialized(jl_lambda_info_t *method);
STATIC_INLINE jl_value_t *jl_call_method_internal(jl_lambda_info_t *meth, jl_value_t **args, uint32_t nargs)
{
jl_lambda_info_t *mfptr = meth;
if (__unlikely(mfptr->fptr == NULL))
if (__unlikely(mfptr->fptr == NULL)) {
mfptr = jl_compile_for_dispatch(mfptr);
if (!mfptr->fptr)
jl_generate_fptr(mfptr);
}
if (mfptr->jlcall_api == 0)
return mfptr->fptr(args[0], &args[1], nargs-1);
else if (mfptr->jlcall_api == 1)
Expand Down
Loading