Skip to content

Commit

Permalink
[cheshire] Back-ref sw compilation flow for fmatmul
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Jul 10, 2024
1 parent e91f42c commit 1f0d14b
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 41 deletions.
10 changes: 5 additions & 5 deletions cheshire/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Support for FPGA synthesis was added to Ara by integrating it into Cheshire. Sin

1. **Navigate to the Root Directory**
Ensure you are in the root directory where the Makefile is located.

2. **Set up environment**
Set the `BACKREF_CHS_ROOT` variable to root directory of the Cheshire repository where you want to build the bitstream.

Expand All @@ -26,17 +26,17 @@ This command will:
Here's how we use back-referencing in our setup:

1. **Generate Custom TCL File**:

- We generate a custom `add_sources.vcu128.tcl` file using the `bender script vivado` command with our specific targets (`-t fpga -t cv64a6_imafdcv_sv39 -t cva6 -t vcu128 --define ARA`).
- This custom TCL file includes all the necessary sources and configurations required for the FPGA synthesis with Cheshire + Ara.

2. **Copy Custom TCL File**:

- The generated custom TCL file is then copied into the Cheshire directory (`$(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl`).

3. **Invoke Cheshire Compile Flow**:

- With the custom TCL file in place, we invoke the Cheshire compile flow by running `make -C $(BACKREF_CHS_ROOT) chs-xilinx-all`.
- The Cheshire compile flow target depends on the `add_sources.vcu128.tcl` file, and since we have provided our custom version, it will use ours for the synthesis process.

This method ensures that we can extend and customize the compile flow for our specific needs without modifying the Cheshire repository directly.
This method ensures that we can extend and customize the compile flow for our specific needs without modifying the Cheshire repository directly.
32 changes: 22 additions & 10 deletions cheshire/sw/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,34 @@
#
# Copy and compile vector software on Cheshire

CHS_ROOT ?= $(realpath ../../../../../..)
ARA_SW := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))
CHS_ROOT ?= $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../../../../../..
ARA_ROOT := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../..
CHS_SW := $(CHS_ROOT)/sw
SRC := $(wildcard $(ARA_SW)/*.c) $(wildcard $(ARA_SW)/*.h)
ARA_SW := $(ARA_ROOT)/cheshire/sw
ARA_APPS := $(ARA_ROOT)/apps

APPS := $(patsubst $(ARA_APPS)/%/main.c,%,$(shell find $(ARA_APPS) -name "main.c"))
SW_C := $(wildcard $(ARA_SW)/src/*.c)
DEPS_H := $(wildcard $(ARA_SW)/include/*.h)

ARA_CONFIGURATION ?= 2_lanes
include $(ARA_ROOT)/config/$(ARA_CONFIGURATION).mk

# Get the original compiler options and add the support for vector extension
CHS_SW_FLAGS ?= $(shell grep "^CHS_SW_FLAGS\s\+?=\s\+" -- $(CHS_SW)/sw.mk | sed 's/^.*?= //' | sed s/rv64gc/rv64gcv/)
# Tweak the compilation to include Cheshire-related headers and files
CHS_SW_FLAGS += -DCHESHIRE -DNR_LANES=$(nr_lanes) -DVLEN=$(vlen)

.PHONY: chs-sw-all copy_vector_sw
.PHONY: chs-sw-all copy_vector_sw copy-vector-deps

# Forward build command to the main Cheshire makefile and attach the correct -march
# Rename the .c vector files not to break the cheshire vanilla flow
chs-sw-all: copy-vector-sw
chs-sw-all: copy-vector-sw copy-vector-deps
make -C $(CHS_ROOT) $@ CHS_SW_FLAGS="$(CHS_SW_FLAGS)"
for f in $(filter %.c, $(SRC)); do mv $(CHS_SW)/tests/$f $(CHS_SW)/tests/$f.bkp; done

# Copy the vector programs to cheshire
copy-vector-sw:
cp $(SRC) $(CHS_SW)/tests
# Copy the dependencies from this folder to Cheshire
copy-vector-deps: $(DEPS_H)
cp $^ $(CHS_SW)/tests

# Copy the vector programs from the src folder to cheshire
copy-vector-sw: $(SW_C)
cp $^ $(CHS_SW)/tests
6 changes: 4 additions & 2 deletions cheshire/sw/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Build software for Cheshire Ara

Compile the `.c` programs in this folder with:
## Compile the vector code for Cheshire

Compile the source files with the vector extension support enable:

```bash
make chs-sw-all
```

This command will copy the necessary source files into Cheshire's `sw/tests` directory and compile them with the support for vector extension.
This command will also copy the necessary dependencies to `sw/tests` and enable the vector extension at compile time.
1 change: 0 additions & 1 deletion cheshire/sw/encoding.h

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@

#include "printf.h"

inline void cheshire_start() {
void cheshire_start() {
// Initialize Cheshire's UART
uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET);
uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500);
uart_init(&__base_uart, reset_freq, __BOOT_BAUDRATE);
}

inline void cheshire_finish() {
void cheshire_end() {
// Flush teh UART
uart_write_flush(&__base_uart);
}
Expand Down
1 change: 1 addition & 0 deletions cheshire/sw/include/encoding.h
1 change: 1 addition & 0 deletions cheshire/sw/include/fmatmul.c.h
1 change: 1 addition & 0 deletions cheshire/sw/include/fmatmul.h
57 changes: 57 additions & 0 deletions cheshire/sw/include/vector_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Matteo Perotti <[email protected]>
//
// Custom vector util

#ifndef __VECTOR_UTIL_H__
#define __VECTOR_UTIL_H__

// Compile with version(GCC) >= 13
#include <riscv_vector.h>
#include "encoding.h"

#define FABS(x) ((x < 0) ? -x : x)

unsigned int timer;

// Return the current value of the cycle counter
int64_t get_cycle_count() {
int64_t cycle_count;
// The fence is needed to be sure that Ara is idle, and it is not performing
// the last vector stores when we read mcycle with stop_timer()
asm volatile("fence; csrr %[cycle_count], cycle" : [cycle_count] "=r"(cycle_count));
return cycle_count;
};

// Start and stop the counter
void start_timer() { timer = -get_cycle_count(); }
void stop_timer() { timer += get_cycle_count(); }

// Get the value of the timer
int64_t get_timer() { return timer; }

inline void enable_rvv() {
asm volatile ("li t0, %0" :: "i"(MSTATUS_VS));
asm volatile ("csrs mstatus, t0" );
}

inline int similarity_check(double a, double b, double threshold) {
double diff = a - b;
if (FABS(diff) > threshold)
return 0;
else
return 1;
}

inline int similarity_check_32b(float a, float b, float threshold) {
float diff = a - b;
if (FABS(diff) > threshold)
return 0;
else
return 1;
}

#endif
118 changes: 118 additions & 0 deletions cheshire/sw/src/fmatmul.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Matteo Perotti <[email protected]>
//
// fmatmul wrapper for Cheshire

#include "regs/cheshire.h"
#include "dif/clint.h"
#include "dif/uart.h"
#include "params.h"
#include "util.h"

#include "cheshire_util.h"
#include "vector_util.h"

#include "fmatmul.c.h"

#ifndef _MM_SIZE_
#define _MM_SIZE_ 32
#endif

// Define Matrix dimensions:
// C = AB with A=[MxN], B=[NxP], C=[MxP]
uint64_t M = _MM_SIZE_;
uint64_t N = _MM_SIZE_;
uint64_t P = _MM_SIZE_;

// Max matrix size: 256x256
double a[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));
double b[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));
double c[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));
// Gold results
double g[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));

#define THRESHOLD 0.001

// Verify the matrix
int verify_matrix(double *result, double *gold, size_t R, size_t C,
double threshold) {
for (uint64_t i = 0; i < R; ++i) {
for (uint64_t j = 0; j < C; ++j) {
int idx = i * C + j;
if (!similarity_check(result[idx], gold[idx], threshold)) {
return (i + j) == 0 ? -1 : idx;
}
}
}
return 0;
}

int main() {
printf("fmatmul kernel:\r\n");

cheshire_start();
enable_rvv();

unsigned int s = M;

// Initialize matrices
for (unsigned int i = 0; i < s; ++i) {
for (unsigned int k = 0; k < s; ++k) {
a[k + i*s] = (double) (i + k);
}
}
for (unsigned int k = 0; k < s; ++k) {
for (unsigned int j = 0; j < s; ++j) {
b[j + k*s] = (double) (k - j);
}
}

// Run scalar check
printf("Calculating fmatmul on scalar core...\r\n");
for (unsigned int i = 0; i < s; ++i) {
for (unsigned int j = 0; j < s; ++j) {
double sum = 0;
for (unsigned int k = 0; k < s; ++k) {
sum += a[k + i * s] * b[j + k * s];
}
g[j + i * s] = sum;
}
}

// Run vector kernel
printf("Calculating fmatmul on vector core...\r\n");
start_timer();
fmatmul(c, a, b, s, s, s);
stop_timer();

// Metrics
int64_t runtime = get_timer();
float performance = 2.0 * s * s * s / runtime;
float utilization = 100 * performance / (2.0 * NR_LANES);

printf("The execution took %d cycles.\r\n", runtime);
printf("The performance is %f FLOP/cycle (%f%% utilization).\r\n",
performance, utilization);

// Verify the result only for s == M (to keep it simple)
if (s == M) {
printf("Verifying result...\r\n");
int error = verify_matrix(c, g, s, s, THRESHOLD);
if (error != 0) {
printf("Error code %d\r\n", error);
printf("c[%d]=%f != %f\r\n", error, c[error], g[error]);
cheshire_end();
return error;
} else {
printf("Passed.\r\n");
}
}


cheshire_end();

return 0;
}
File renamed without changes.
21 changes: 0 additions & 21 deletions cheshire/sw/vector_util.h

This file was deleted.

0 comments on commit 1f0d14b

Please sign in to comment.