-
Notifications
You must be signed in to change notification settings - Fork 46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PUSCH receiver kernels #108
base: main
Are you sure you want to change the base?
Changes from 38 commits
10ba276
5f667a3
d0482e9
bc8952c
7337c60
ce412ab
8df39cc
52237dc
a2819be
d93ffd0
363fa4e
cab0290
a039b79
edc2c6c
b36b2f7
b7a0c84
a170b74
380729c
bb58026
962c313
6ec634e
d8d29b6
7c6c1b6
ac589b2
8bed4ad
0e6b37c
0e3894f
f0270f8
5f3c750
33701fa
f0570a5
5984c35
0596309
bbab0ca
3b5886b
3ea70e0
5bee548
0f1de6f
c53ec74
264879e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,4 @@ progressbar2 | |
tabulate | ||
sympy | ||
scipy | ||
pyflexfloat |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is better to add a "#define local_parallel" instead of commenting the others out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, thanks. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Copyright 2021 ETH Zurich and University of Bologna. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
// Author: Marco Bertuletti, ETH Zurich | ||
|
||
#include <stdint.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
#include "dma.h" | ||
#include "encoding.h" | ||
#include "printf.h" | ||
#include "runtime.h" | ||
#include "synchronization.h" | ||
|
||
#include "data_axpy_f16.h" | ||
#define NUM_BANKS (NUM_CORES * BANKING_FACTOR) | ||
|
||
// Vectors for kernel computation | ||
__fp16 l1_X[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio"))); | ||
__fp16 l1_Y[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio"))); | ||
|
||
#include "baremetal/mempool_axpy_f16.h" | ||
#include "baremetal/mempool_checks.h" | ||
|
||
int main() { | ||
|
||
uint32_t core_id = mempool_get_core_id(); | ||
uint32_t num_cores = mempool_get_core_count(); | ||
uint32_t time_init, time_end; | ||
mempool_barrier_init(core_id); | ||
|
||
time_init = 0; | ||
time_end = 0; | ||
if (core_id == 0) { | ||
dma_memcpy_blocking(l1_X, l2_X, array_N * sizeof(int16_t)); | ||
dma_memcpy_blocking(l1_Y, l2_Y, array_N * sizeof(int16_t)); | ||
} | ||
uint32_t register volatile a = *(uint32_t *)&(l2_A)&0x0000FFFF; | ||
mempool_barrier(num_cores); | ||
|
||
// PARALLEL, LOCAL ACCESSES | ||
time_init = mempool_get_timer(); | ||
mempool_start_benchmark(); | ||
axpy_f16vecp_local_unrolled4(a, l1_X, l1_Y, array_N); | ||
mempool_stop_benchmark(); | ||
time_end = mempool_get_timer(); | ||
|
||
mempool_barrier(num_cores); | ||
// Check results | ||
if (core_id == 0) { | ||
uint32_t clock_cycles = (time_end - time_init); | ||
printf("\nKernel execution takes %d clock cycles\n", clock_cycles); | ||
} | ||
mempool_check_f16(l1_Y, l2_Z, 100, 0.1f, 0); | ||
mempool_barrier(num_cores); | ||
|
||
return 0; | ||
} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same at this kernel, if we keep all of the kernels in this "main.c", we should add #define. Otherwise, we only leave one kernel instead of commenting others out. What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// Copyright 2021 ETH Zurich and University of Bologna. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
// Author: Marco Bertuletti, ETH Zurich | ||
|
||
#include <stdint.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
#include "dma.h" | ||
#include "encoding.h" | ||
#include "printf.h" | ||
#include "runtime.h" | ||
#include "synchronization.h" | ||
|
||
#include "data_axpy_f32.h" | ||
#define NUM_BANKS (NUM_CORES * BANKING_FACTOR) | ||
|
||
// Vectors for kernel computation | ||
float l1_X[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio"))); | ||
float l1_Y[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio"))); | ||
|
||
#include "baremetal/mempool_axpy_f32.h" | ||
#include "baremetal/mempool_checks.h" | ||
|
||
int main() { | ||
|
||
uint32_t core_id = mempool_get_core_id(); | ||
uint32_t num_cores = mempool_get_core_count(); | ||
uint32_t time_init, time_end; | ||
mempool_barrier_init(core_id); | ||
|
||
time_init = 0; | ||
time_end = 0; | ||
if (core_id == 0) { | ||
dma_memcpy_blocking(l1_X, l2_X, array_N * sizeof(int32_t)); | ||
dma_memcpy_blocking(l1_Y, l2_Y, array_N * sizeof(int32_t)); | ||
} | ||
float register volatile a = l2_A; | ||
mempool_barrier(num_cores); | ||
|
||
// PARALLEL | ||
time_init = mempool_get_timer(); | ||
mempool_start_benchmark(); | ||
axpy_f32p_local_unrolled4(a, l1_X, l1_Y, array_N); | ||
mempool_stop_benchmark(); | ||
time_end = mempool_get_timer(); | ||
|
||
// Check results | ||
if (core_id == 0) { | ||
uint32_t clock_cycles = (time_end - time_init); | ||
printf("\nKernel execution takes %d clock cycles\n", clock_cycles); | ||
} | ||
mempool_check_f32(l1_Y, l2_Z, 100, 0.1f, 0); | ||
mempool_barrier(num_cores); | ||
|
||
return 0; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we have the convention of adding the i32/f16/... suffix, we could easily automatically find all FP and I apps with a wildcard, right?