diff --git a/.gitmodules b/.gitmodules index 620a456ac..481a1cd46 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,3 +6,6 @@ path = sw/deps/cva6-sdk url = https://github.com/pulp-platform/cva6-sdk.git ignore = dirty +[submodule "sw/deps/litmus-tests"] + path = sw/deps/litmus-tests + url = https://github.com/pulp-platform/CHERI-Litmus.git diff --git a/cheshire.mk b/cheshire.mk index 433409222..b5f14be0a 100644 --- a/cheshire.mk +++ b/cheshire.mk @@ -10,11 +10,13 @@ BENDER ?= bender # Caution: Questasim requires this to point to the *actual* compiler install path CXX_PATH := $(shell which $(CXX)) +QUESTA ?= questa-2023.4 VLOG_ARGS ?= -suppress 2583 -suppress 13314 -timescale 1ns/1ps # Common Bender flags for Cheshire RTL CHS_BENDER_RTL_FLAGS ?= -t rtl -t cva6 -t cv64a6_imafdcsclic_sv39 +NUM_CORES ?= 1 # Define used paths (prefixed to avoid name conflicts) CHS_ROOT ?= $(shell $(BENDER) path cheshire) @@ -61,7 +63,7 @@ chs-clean-deps: ###################### CHS_NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/cheshire-nonfree.git -CHS_NONFREE_COMMIT ?= fd3526f +CHS_NONFREE_COMMIT ?= 1deb6804931b6ded1ec282b0766d0501ff8ce734 CHS_PHONY += chs-nonfree-init chs-nonfree-init: @@ -85,12 +87,15 @@ $(CHS_ROOT)/hw/regs/cheshire_reg_pkg.sv $(CHS_ROOT)/hw/regs/cheshire_reg_top.sv: $(REGTOOL) -r $< --outdir $(dir $@) # CLINT -CLINTCORES ?= 1 +CLINTCORES ?= $(NUM_CORES) include $(CLINTROOT)/clint.mk $(CLINTROOT)/.generated: flock -x $@ $(MAKE) clint && touch $@ # OpenTitan peripherals +$(CHS_ROOT)/hw/rv_plic.cfg.hjson: $(CHS_ROOT)/util/gen_pliccfg.py + $(CHS_ROOT)/util/gen_pliccfg.py --num-cores $(NUM_CORES) > $@ + include $(OTPROOT)/otp.mk $(OTPROOT)/.generated: $(CHS_ROOT)/hw/rv_plic.cfg.hjson flock -x $@ sh -c "cp $< $(dir $@)/src/rv_plic/; $(MAKE) -j1 otp" && touch $@ @@ -168,6 +173,66 @@ CHS_SIM_ALL += $(CHS_ROOT)/target/sim/models/s25fs512s.v CHS_SIM_ALL += $(CHS_ROOT)/target/sim/models/24FC1025.v CHS_SIM_ALL += $(CHS_ROOT)/target/sim/vsim/compile.cheshire_soc.tcl +################ +# Litmus tests # +################ +LITMUS_NCORES ?= 2 +LITMUS_DIR := $(CHS_SW_DIR)/deps/litmus-tests +LITMUS_BIN_DIR := $(LITMUS_DIR)/binaries +LITMUS_WORK_DIR := $(CHS_ROOT)/work-litmus +LITMUS_SIMLOG_DIR := $(LITMUS_WORK_DIR)/simlogs +LITMUS_TEST_LIST := $(LITMUS_WORK_DIR)/litmus-tests.list +LITMUS_RESULTS := $(LITMUS_WORK_DIR)/compare.log + +$(LITMUS_DIR)/.git: + cd $(CHS_ROOT) && git submodule update --init --recursive $(LITMUS_DIR) + +.PHONY: chs-build-litmus-tests +chs-build-litmus-tests: $(LITMUS_DIR)/.git + cd $(LITMUS_DIR)/frontend; ./make.sh + cd $(LITMUS_DIR)/binaries; ./make-riscv.sh ../tests/ cheshire $(LITMUS_NCORES) + +$(LITMUS_WORK_DIR): + mkdir -p $(LITMUS_WORK_DIR) + +$(LITMUS_SIMLOG_DIR): + mkdir -p $(LITMUS_SIMLOG_DIR) + +$(LITMUS_TEST_LIST): $(LITMUS_WORK_DIR) $(LITMUS_SIMLOG_DIR) + @echo Generating $@ ... + @LITMUS_ROOT=$(LITMUS_DIR) LITMUS_WORK=$(LITMUS_WORK_DIR) $(CHS_ROOT)/util/litmus create_list + +$(LITMUS_SIMLOG_DIR)/%.log: $(LITMUS_BIN_DIR)/%.elf $(CHS_SIM_ALL) + @echo "Running test $(notdir $<) (Log file: $@)" + @cd target/sim/vsim && $(QUESTA) vsim -c -do "set PRELMODE 1; set BOOTMODE 0; set BINARY $<; source start.cheshire_soc.tcl; run -all" > $@ 2>&1 + @echo "Finished test $<" + +.PHONY: chs-run-litmus-tests +chs-run-litmus-tests: $(LITMUS_TEST_LIST) + $(eval LITMUS_TESTS_ELF = $(shell xargs printf '\n%s' < $(LITMUS_TEST_LIST))) + @echo Running $(words $(LITMUS_TESTS_ELF)) tests + @$(MAKE) $(addprefix $(LITMUS_SIMLOG_DIR)/, $(LITMUS_TESTS_ELF:.elf=.log)) + @echo "Finished running litmus tests" + +.PHONY: chs-check-litmus-tests +chs-check-litmus-tests: + $(eval export LITMUS_ROOT=$(LITMUS_DIR)) + $(eval export LITMUS_WORK=$(LITMUS_WORK_DIR)) + @echo "Parsing UART output from simulation logs.." + @$(CHS_ROOT)/util/litmus parse_uart + @echo "Patching UART logs.." + @$(CHS_ROOT)/util/litmus patch_uart + @echo "Concatenating logs in a single file.." + @$(CHS_ROOT)/util/litmus combine_logs + @echo "Comparing logs with reference model.." + @$(CHS_ROOT)/util/litmus check > $(LITMUS_RESULTS) + @echo "Done! Check '$(LITMUS_RESULTS)' file" + +.PHONY: chs-clean-litmus-tests +chs-clean-litmus-tests: + rm -rf $(LITMUS_WORK_DIR) + cd $(LITMUS_DIR)/binaries; rm *.elf *.dump + ########### # DRAMSys # ########### diff --git a/hw/bootrom/cheshire_bootrom.S b/hw/bootrom/cheshire_bootrom.S index 22bbeb9f0..c04f938f8 100644 --- a/hw/bootrom/cheshire_bootrom.S +++ b/hw/bootrom/cheshire_bootrom.S @@ -5,10 +5,15 @@ // Nicole Narr // Christopher Reinwardt // Paul Scheffler +// Enrico Zelioli -// TODO: Avoid hardcoding in addresses and offsets +#include +#include -#include "smp.h" +// The hart that non-SMP tests should run on +#ifndef NONSMP_HART +#define NONSMP_HART 0 +#endif .section .text._start @@ -47,7 +52,11 @@ _start: li x31, 0 // Pause SMP harts - smp_pause(t0, t1) + li t1, 0x8 + csrw mie, t1 + li t0, NONSMP_HART + csrr t1, mhartid + bne t0, t1, _wait_for_ipi // Init stack and global pointer with safe, linked values la sp, __stack_pointer$ @@ -57,56 +66,101 @@ _start: .option pop // If LLC present: Wait for end of BIST, then extend stack and set to all SPM - la t0, __base_regs - lw t0, 80(t0) // regs.HW_FEATURES - andi t0, t0, 2 // regs.HW_FEATURES.llc + la t0, __base_regs + lw t0, CHESHIRE_HW_FEATURES_REG_OFFSET(t0) + andi t0, t0, 2 // HW_FEATURES.llc beqz t0, _prom_check_run - la t0, __base_llc + la t0, __base_llc _wait_llc_bist: - lw t1, 72(t0) // llc.BIST_STATUS_DONE_BIT + lw t1, AXI_LLC_BIST_STATUS_REG_OFFSET(t0) // Check BIST status done bit beqz t1, _wait_llc_bist - li t1, -1 - sw t1, 0(t0) // llc.CFG_SPM_LOW - sw t1, 4(t0) // llc.CFG_SPM_HIGH - li t1, 1 - sw t1, 16(t0) // llc.CFG_COMMIT + li t1, -1 + sw t1, AXI_LLC_CFG_SPM_LOW_REG_OFFSET(t0) + sw t1, AXI_LLC_CFG_SPM_HIGH_REG_OFFSET(t0) + li t1, 1 + sw t1, AXI_LLC_COMMIT_CFG_REG_OFFSET(t0) // Correct stack to start at end of SPM - la t0, __base_regs - la sp, __base_spm - lw t0, 84(t0) // regs.LLC_SIZE - add sp, sp, t0 + la t0, __base_regs + la sp, __base_spm + lw t0, CHESHIRE_LLC_SIZE_REG_OFFSET(t0) + add sp, sp, t0 addi sp, sp, -8 // Enter Platform ROM if present. _prom_check_run: // Note that we have internal access to SPM here *if and only if* there is an LLC. la t0, __base_regs - lw t0, 72(t0) // regs.PLATFORM_ROM + lw t0, CHESHIRE_PLATFORM_ROM_REG_OFFSET(t0) beqz t0, _boot jalr t0 +// Move to next stage of booting +// 1. Write the address of next stage boot loader in Cheshire's scratch registers +// 2. Resume execution of all other harts .global boot_next_stage boot_next_stage: - // Non-SMP hart: Write boot address into global scratch registers - la t0, __base_regs - sw a0, 16(t0) // regs.SCRATCH[4] + + // Non-SMP hart: write boot address into global scratch registers + la t0, __base_regs + sw a0, CHESHIRE_SCRATCH_4_REG_OFFSET(t0) srli a0, a0, 32 - sw a0, 20(t0) // regs.SCRATCH[5] + sw a0, CHESHIRE_SCRATCH_5_REG_OFFSET(t0) fence - // Resume SMP harts - smp_resume(t0, t1, t2) + + // Resume SMP harts: set CLINT IPI registers + // NOTE: this will cause CLINT to send IPIs to all cores, therefore also the + // non-smp hart will receive one. The following instructions make sure that + // all harts will wait until the IPI is received (WFI with global ie disabled), + // then clear the IPI in the CLINT and wait until all other harts are done with it. + la t0, __base_clint + la t2, __base_regs + lw t2, CHESHIRE_NUM_INT_HARTS_REG_OFFSET(t2) + slli t2, t2, 2 + add t2, t0, t2 // t2 = CLINT_BASE + (n_harts * 4) +1: + li t1, 1 + sw t1, 0(t0) + addi t0, t0, 4 + blt t0, t2, 1b + +// Stall hart until IPI is raised +_wait_for_ipi: + + // Wait until this hart receives IPI + wfi + csrr t1, mip + andi t1, t1, 0x8 + beqz t1, _wait_for_ipi + + // Clear CLINT IPI register for this hart + la t0, __base_clint + csrr t1, mhartid + slli t1, t1, 2 + add t1, t1, t0 + sw zero, 0(t1) // *(CLINT_BASE + hart_id * 4) = 0 + + la t2, __base_regs + lw t2, CHESHIRE_NUM_INT_HARTS_REG_OFFSET(t2) + slli t2, t2, 2 + add t2, t0, t2 // t2 = CLINT_BASE + (n_harts * 4) + + // Wait until *all* CLINT IPI registers are cleared +1: + lw t1, 0(t0) + bnez t1, 1b + addi t0, t0, 4 + blt t0, t2, 1b + + // Jump to next stage // Load boot address from global scratch registers - la t0, __base_regs - lwu t1, 20(t0) // regs.SCRATCH[5] + la t0, __base_regs + lwu t1, CHESHIRE_SCRATCH_5_REG_OFFSET(t0) slli t1, t1, 32 - lwu t0, 16(t0) // regs.SCRATCH[4] - or t0, t0, t1 - // Store hartid to a0 - csrr a0, mhartid - // Jump to boot address - jalr ra, 0(t0) - // We should never get here - ret + lwu t0, CHESHIRE_SCRATCH_4_REG_OFFSET(t0) + or t0, t0, t1 + csrr a0, mhartid // Store hartid to a0 + jalr ra, 0(t0) // Jump to boot address + ret // We should never get here // Reset regs, full fence, then jump to main _boot: @@ -120,9 +174,10 @@ _boot: .global _exit _exit: // Save the return value to scratch register 2 and wait forever + // Set bit 0 to signal that the execution is done. slli a0, a0, 1 ori a0, a0, 1 la t0, __base_regs - sw a0, 8(t0) // regs.SCRATCH[2] + sw a0, CHESHIRE_SCRATCH_2_REG_OFFSET(t0) 1: wfi j 1b diff --git a/hw/rv_plic.cfg.hjson b/hw/rv_plic.cfg.hjson index 865be3a9b..ad2f247d9 100644 --- a/hw/rv_plic.cfg.hjson +++ b/hw/rv_plic.cfg.hjson @@ -3,6 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 // // Paul Scheffler +// Enrico Zelioli +// AUTOMATICALLY GENERATED by gen_pliccfg.py; edit the script instead. { instance_name: "rv_plic", diff --git a/sw/boot/zsl.c b/sw/boot/zsl.c index ab2edb655..ed682f7bb 100644 --- a/sw/boot/zsl.c +++ b/sw/boot/zsl.c @@ -14,6 +14,7 @@ #include "gpt.h" #include "dif/uart.h" #include "printf.h" +#include "smp.h" // Type for firmware payload typedef int (*payload_t)(uint64_t, uint64_t, uint64_t); @@ -51,40 +52,48 @@ static inline void load_part_or_spin(void *priv, const uint64_t *pguid, void *co } int main(void) { - // Get system parameters - uint32_t bootmode = *reg32(&__base_regs, CHESHIRE_BOOT_MODE_REG_OFFSET); - uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); - uint64_t core_freq = clint_get_core_freq(rtc_freq, 2500); - rgp = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_3_REG_OFFSET); - uint32_t read = *reg32(&__base_regs, CHESHIRE_SCRATCH_0_REG_OFFSET); - void *priv = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_1_REG_OFFSET); - // Initialize UART - uart_init(&__base_uart, core_freq, __BOOT_BAUDRATE); + uint64_t hart_id = get_mhartid(); - // Print boot-critical cat, and also parameters - printf(" /\\___/\\ Boot mode: %d\r\n" - "( o o ) Real-time clock: %d Hz\r\n" - "( =^= ) System clock: %d Hz\r\n" - "( ) Read global ptr: 0x%08x\r\n" - "( P ) Read pointer: 0x%08x\r\n" - "( U # L ) Read argument: 0x%08x\r\n" - "( P )\r\n" - "( ))))))))))\r\n\r\n", - bootmode, rtc_freq, core_freq, rgp, read, priv); + if (hart_id == 0) { + // Get system parameters + uint32_t bootmode = *reg32(&__base_regs, CHESHIRE_BOOT_MODE_REG_OFFSET); + uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); + uint64_t core_freq = clint_get_core_freq(rtc_freq, 2500); + rgp = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_3_REG_OFFSET); + uint32_t read = *reg32(&__base_regs, CHESHIRE_SCRATCH_0_REG_OFFSET); + void *priv = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_1_REG_OFFSET); - // If this is a GPT disk boot, load payload and device tree - if (read & 1) { - rread = (gpt_read_t)(void *)(uintptr_t)(read & ~1); - load_part_or_spin(priv, __BOOT_DTB_TYPE_GUID, __BOOT_ZSL_DTB, "device tree", 64); - load_part_or_spin(priv, __BOOT_FW_TYPE_GUID, __BOOT_ZSL_FW, "firmware", 8192); + // Initialize UART + uart_init(&__base_uart, core_freq, __BOOT_BAUDRATE); + + // Print boot-critical cat, and also parameters + printf(" /\\___/\\ Boot mode: %d\r\n" + "( o o ) Real-time clock: %d Hz\r\n" + "( =^= ) System clock: %d Hz\r\n" + "( ) Read global ptr: 0x%08x\r\n" + "( P ) Read pointer: 0x%08x\r\n" + "( U # L ) Read argument: 0x%08x\r\n" + "( P )\r\n" + "( ))))))))))\r\n\r\n", + bootmode, rtc_freq, core_freq, rgp, read, priv); + + // If this is a GPT disk boot, load payload and device tree + if (read & 1) { + rread = (gpt_read_t)(void *)(uintptr_t)(read & ~1); + load_part_or_spin(priv, __BOOT_DTB_TYPE_GUID, __BOOT_ZSL_DTB, "device tree", 64); + load_part_or_spin(priv, __BOOT_FW_TYPE_GUID, __BOOT_ZSL_FW, "firmware", 8192); + } + + // Launch payload + printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", __BOOT_ZSL_FW, + __BOOT_ZSL_DTB); + smp_resume(); } - // Launch payload payload_t fw = __BOOT_ZSL_FW; - printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", fw, __BOOT_ZSL_DTB); fencei(); - return fw(0, (uintptr_t)__BOOT_ZSL_DTB, 0); + return fw(hart_id, (uintptr_t)__BOOT_ZSL_DTB, 0); } // On trap, report relevant CSRs and spin diff --git a/sw/deps/litmus-tests b/sw/deps/litmus-tests new file mode 160000 index 000000000..174253ec8 --- /dev/null +++ b/sw/deps/litmus-tests @@ -0,0 +1 @@ +Subproject commit 174253ec83e851cec2a7b6c10e6d40e8daf3ab07 diff --git a/sw/include/smp.h b/sw/include/smp.h index d13d87579..ea77d8c4e 100644 --- a/sw/include/smp.h +++ b/sw/include/smp.h @@ -1,49 +1,25 @@ -// Copyright 2023 ETH Zurich and University of Bologna. +// Copyright 2022 ETH Zurich and University of Bologna. // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 +// +// Emanuele Parisi +// Enrico Zelioli #pragma once -// The hart that non-SMP tests should run on -#ifndef NONSMP_HART -#define NONSMP_HART 0 -#endif +#include +#include -// Let non-SMP hart continue and all other harts jump (and loop) in smp_resume -#define smp_pause(reg1, reg2) \ - li reg2, 0x8; \ - csrw mie, reg2; \ - li reg1, NONSMP_HART; \ - csrr reg2, mhartid; \ - bne reg1, reg2, 2f +#include "util.h" +#include "regs/cheshire.h" +#include "params.h" -#define smp_resume(reg1, reg2, reg3) \ - la reg1, __base_clint; \ - la reg3, __base_regs; \ - lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \ - slli reg3, reg3, 2; \ - add reg3, reg1, reg3; \ - 1:; \ - li reg2, 1; \ - sw reg2, 0(reg1); \ - addi reg1, reg1, 4; \ - blt reg1, reg3, 1b; \ - 2:; \ - wfi; \ - csrr reg2, mip; \ - andi reg2, reg2, 0x8; \ - beqz reg2, 2b; \ - la reg1, __base_clint; \ - csrr reg2, mhartid; \ - slli reg2, reg2, 2; \ - add reg2, reg2, reg1; \ - sw zero, 0(reg2); \ - la reg3, __base_regs; \ - lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \ - slli reg3, reg3, 2; \ - add reg3, reg1, reg3; \ - 3:; \ - lw reg2, 0(reg1); \ - bnez reg2, 3b; \ - addi reg1, reg1, 4; \ - blt reg1, reg3, 3b +/* + * Resume execution in all harts. + * Send an IPI to all harts except for hart 0. + */ +void smp_resume(void); + +void smp_barrier_init(); +void smp_barrier_up(uint64_t n_processes); +void smp_barrier_down(); diff --git a/sw/include/util.h b/sw/include/util.h index 07159945d..621636281 100644 --- a/sw/include/util.h +++ b/sw/include/util.h @@ -33,6 +33,10 @@ static inline void wfi() { asm volatile("wfi" ::: "memory"); } +static inline void nop() { + asm volatile("nop" ::: "memory"); +} + // Enables or disables M-mode timer interrupts. static inline void set_mtie(int enable) { if (enable) @@ -41,6 +45,29 @@ static inline void set_mtie(int enable) { asm volatile("csrc mie, %0" ::"r"(128) : "memory"); } +// Enables or disables M-mode software interrupts. +static inline void set_msie(int enable) { + if (enable) + asm volatile("csrs mie, %0" ::"r"(8) : "memory"); + else + asm volatile("csrc mie, %0" ::"r"(8) : "memory"); +} + +// Enables or disables M-mode software interrupts pending bit. +static inline void set_msip(int enable) { + if (enable) + asm volatile("csrs mip, %0" ::"r"(8) : "memory"); + else + asm volatile("csrc mip, %0" ::"r"(8) : "memory"); +} + +// Get M-mode software interrupts pending bit. +static inline uint64_t get_msip() { + uint64_t msip; + asm volatile("csrr %0, mip" : "=r"(msip)::"memory"); + return (msip & 0x8) >> 3; +} + // Enables or disables M-mode global interrupts. static inline void set_mie(int enable) { if (enable) @@ -49,6 +76,13 @@ static inline void set_mie(int enable) { asm volatile("csrci mstatus, 8" ::: "memory"); } +// Get hart id +static inline uint64_t get_mhartid() { + uint64_t mhartid; + asm volatile("csrr %0, mhartid" : "=r"(mhartid)::"memory"); + return mhartid; +} + // Get cycle count since reset static inline uint64_t get_mcycle() { uint64_t mcycle; diff --git a/sw/lib/crt0.S b/sw/lib/crt0.S index ebf372ada..7bf8a870b 100644 --- a/sw/lib/crt0.S +++ b/sw/lib/crt0.S @@ -5,6 +5,8 @@ // Nicole Narr // Christopher Reinwardt // Paul Scheffler +// Emanuele Parisi +// Enrico Zelioli .section .text._start @@ -14,28 +16,29 @@ _start: // Globally disable Machine and Supervisor interrupts csrrc x0, mstatus, 10 - // Park SMP harts - csrr t0, mhartid - beqz t0, 2f -1: - wfi - j 1b -2: - // Init stack and global pointer iff linked as nonzero - mv t1, sp - la t0, __stack_pointer$ - beqz t0, 1f - mv sp, t0 -1: .option push +_init_gp: + // Init global pointer iff linked as nonzero + .option push .option norelax la t0, __global_pointer$ - beqz t0, 1f + beqz t0, _init_sp mv gp, t0 -1: .option pop - + .option pop + +_init_sp: + // Init stack pointer iff linked as nonzero + mv t0, sp + la t1, __stack_pointer$ + beqz t1, _init_context + la t2, __stack_size$ + csrr t3, mhartid + mul t3, t3, t2 + sub sp, t1, t3 + +_init_context: // Store existing stack, global, return pointers on new stack addi sp, sp, -24 - sd t1, 0(sp) + sd t0, 0(sp) sd gp, 8(sp) sd ra, 16(sp) @@ -43,31 +46,6 @@ _start: la t0, _trap_handler_wrap csrrw x0, mtvec, t0 - // Zero the .bss section - la t0, __bss_start // t0 = bss start address - la t1, __bss_end // t1 = bss end address - sub t2, t1, t0 // t2 = #bytes to zero - li a0, 0 - -_zero_bss_loop: - addi t4, t2, -32 - blez t2, _fp_init // t2 <= 0? => No bss to zero - blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left - sd a0, 0(t0) - sd a0, 8(t0) - sd a0, 16(t0) - sd a0, 24(t0) - addi t2, t2, -32 - addi t0, t0, 32 - bgt t2, x0, _zero_bss_loop // Still more to go - j _fp_init - -_zero_bss_rem: - sb a0, 0(t0) - addi t2, t2, -1 - addi t0, t0, 1 - bgt t2, x0, _zero_bss_rem - _fp_init: // Set FS state to "Initial", enabling FP instructions li t1, 1 @@ -111,6 +89,40 @@ _fp_init: // Set FS state to "Clean" csrrc x0, mstatus, t1 +// Pause all harts except for hart 0 until a IPI is received. +// On wake-up every core resumes execution from the beginning of main(). +_smp_pause: + // Pause harts with hart ID != 0 + csrr t0, mhartid + bnez t0, _wait_for_ipi + +_zero_bss_init: + // Zero the .bss section + la t0, __bss_start // t0 = bss start address + la t1, __bss_end // t1 = bss end address + sub t2, t1, t0 // t2 = #bytes to zero + li a0, 0 + +_zero_bss_loop: + addi t4, t2, -32 + blez t2, _entry // t2 <= 0? => No bss to zero + blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left + sd a0, 0(t0) + sd a0, 8(t0) + sd a0, 16(t0) + sd a0, 24(t0) + addi t2, t2, -32 + addi t0, t0, 32 + bgt t2, x0, _zero_bss_loop // Still more to go + j _entry + +_zero_bss_rem: + sb a0, 0(t0) + addi t2, t2, -1 + addi t0, t0, 1 + bgt t2, x0, _zero_bss_rem + +_entry: // Full fence, then jump to main fence call main @@ -130,6 +142,25 @@ _exit: // Hand over to whatever called us, passing return ret +_wait_for_ipi: + csrs mie, 0x8 // Enable M-mode software interrupts +1: + wfi + csrr t0, mip + andi t0, t0, 0x8 + beqz t0, 1b + + // Received IPI -> clear MIP and CLINT IPI register + csrc mip, 0x8 + la t0, __base_clint + csrr t1, mhartid + slli t1, t1, 2 + add t1, t1, t0 + sw zero, 0(t1) // *(CLINT_BASE + hart_id * 4) = 0 + + // Resume execution of non-smp harts at beginning of main + j _entry + // This wraps the C trap handler to save the (integer-only) caller-save // registers and perform a proper machine-mode exception return. .align 4 diff --git a/sw/lib/smp.c b/sw/lib/smp.c new file mode 100644 index 000000000..3474b6e14 --- /dev/null +++ b/sw/lib/smp.c @@ -0,0 +1,48 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Emanuele Parisi +// Enrico Zelioli + +#include "smp.h" + +void smp_resume(void) { + uint32_t num_harts = *reg32(&__base_regs, CHESHIRE_NUM_INT_HARTS_REG_OFFSET); + // Flush cache and wake-up all sleeping cores + fence(); + for (uint32_t i = 1; i < num_harts; i++) { + *reg32(&__base_clint, i << 2) = 0x1; + while (*reg32(&__base_clint, i << 2)) + ; + } +} + +// Shared variable for barrier synchronization +static volatile uint64_t _barrier_target = 0; + +static void barrier_wait(volatile uint64_t *barrier, uint64_t incr, uint64_t reach) { + asm volatile("amoadd.d x6, %1, (%0) \n" + "2: \n" + "fence \n" + "ld x6, 0(%0) \n" + "bne x6, %2, 2b \n" + + : /* output operands */ + : /* input operands */ + "r"(barrier), "r"(incr), "r"(reach) + : /* clobbered registers */ + "x6"); +} + +void smp_barrier_init() { + _barrier_target = 0; +} + +void smp_barrier_up(uint64_t n_processes) { + barrier_wait(&_barrier_target, 1, n_processes); +} + +void smp_barrier_down() { + barrier_wait(&_barrier_target, -1, 0); +} diff --git a/sw/link/common.ldh b/sw/link/common.ldh index d55a52498..7eb308c3e 100644 --- a/sw/link/common.ldh +++ b/sw/link/common.ldh @@ -29,6 +29,10 @@ SECTIONS { __global_pointer$ = ADDR(.misc) + SIZEOF(.misc) / 2; __stack_pointer$ = 0; + /* Stack size */ + /* Use a default stack size of 4KiB */ + __stack_size$ = 0x1000; + /* Further addresses */ __base_dma = 0x01000000; __base_bootrom = 0x02000000; diff --git a/sw/link/rom.ld b/sw/link/rom.ld index 29550ae2c..f386645c5 100644 --- a/sw/link/rom.ld +++ b/sw/link/rom.ld @@ -9,6 +9,8 @@ INCLUDE common.ldh SECTIONS { + __stack_pointer$ = ORIGIN(spm) + LENGTH(spm) - 8; + /* Put all LOAD sections in one contiguous output section */ .misc : { *(.text._start) diff --git a/sw/link/spm.ld b/sw/link/spm.ld index bedd94734..ff96285cc 100644 --- a/sw/link/spm.ld +++ b/sw/link/spm.ld @@ -9,6 +9,8 @@ INCLUDE common.ldh SECTIONS { + __stack_pointer$ = ORIGIN(spm) + LENGTH(spm) - 8; + .text : { *(.text._start) *(.text) diff --git a/sw/tests/smp_hello.c b/sw/tests/smp_hello.c new file mode 100644 index 000000000..c68b6a745 --- /dev/null +++ b/sw/tests/smp_hello.c @@ -0,0 +1,58 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Nicole Narr +// Christopher Reinwardt +// Emanuele Parisi +// Enrico Zelioli +// +// Simple SMP Hello World. + +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "dif/uart.h" +#include "params.h" +#include "util.h" +#include "smp.h" +#include "printf.h" + +uint32_t __attribute__((section(".data"))) semaphore = 0x0; + +void semaphore_wait() { + asm volatile(" li t0, 1 \n" + "1: \n" + " amoswap.w.aq t0, t0, (%0) \n" + " bnez t0, 1b \n" ::"r"(&semaphore)); +} + +void semaphore_post() { + asm volatile(" amoswap.w.rl zero, zero, (%0) \n" ::"r"(&semaphore)); +} + +int main(void) { + + uint64_t hart_id = get_mhartid(); + uint32_t num_harts = *reg32(&__base_regs, CHESHIRE_NUM_INT_HARTS_REG_OFFSET); + + if (hart_id == 0) { + uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); + uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500); + uart_init(&__base_uart, reset_freq, __BOOT_BAUDRATE); + smp_barrier_init(); + smp_resume(); + } + + smp_barrier_up(num_harts); + + for (uint64_t i = 0; i < 1; i++) { + semaphore_wait(); + printf("Core %d/%d up\n", hart_id, num_harts); + uart_write_flush(&__base_uart); + semaphore_post(); + } + + smp_barrier_down(); + + return 0; +} diff --git a/target/sim/src/tb_cheshire_pkg.sv b/target/sim/src/tb_cheshire_pkg.sv index 8197b2dfa..7b5f882db 100644 --- a/target/sim/src/tb_cheshire_pkg.sv +++ b/target/sim/src/tb_cheshire_pkg.sv @@ -23,14 +23,22 @@ package tb_cheshire_pkg; return ret; endfunction + // A dedicated dual-core config + function automatic cheshire_cfg_t gen_cheshire_dualcore_cfg(); + cheshire_cfg_t ret = DefaultCfg; + ret.NumCores = 2; + return ret; + endfunction + // Number of Cheshire configurations - localparam int unsigned NumCheshireConfigs = 32'd3; + localparam int unsigned NumCheshireConfigs = 32'd4; // Assemble a configuration array indexed by a numeric parameter localparam cheshire_cfg_t [NumCheshireConfigs-1:0] TbCheshireConfigs = { - gen_cheshire_clic_cfg(), // 2: CLIC-enabled configuration - gen_cheshire_rt_cfg(), // 1: RT-enabled configuration - DefaultCfg // 0: Default configuration + gen_cheshire_dualcore_cfg(), // 3: Dual-core configuration + gen_cheshire_clic_cfg(), // 2: CLIC-enabled configuration + gen_cheshire_rt_cfg(), // 1: RT-enabled configuration + DefaultCfg // 0: Default configuration }; endpackage diff --git a/util/gen_pliccfg.py b/util/gen_pliccfg.py new file mode 100755 index 000000000..07d7e5705 --- /dev/null +++ b/util/gen_pliccfg.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Nicole Narr +# Christopher Reinwardt +# +# Fabian Schuiki +# Florian Zaruba +# Stefan Mach +# Thomas Benz +# Paul Scheffler +# Wolfgang Roenninger +# Gianna Paulin +# Tim Fischer +# Enrico Zelioli + +import os +import argparse + +# Parse arguments. +parser = argparse.ArgumentParser(description="Generate rv_plic.cfg.hjson") +parser.add_argument( + "--num-cores", + "-n", + dest="num_cores", + default=1, + type=int, + help= + "Number of cores attached to the PLIC. The number of PLIC targets is set accordingly (2 * num-cores)" +) +args = parser.parse_args() + +num_targets = args.num_cores * 2 + +# Emit the code. +print(""" +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Paul Scheffler +// Enrico Zelioli +// AUTOMATICALLY GENERATED by {script}; edit the script instead. + +{{ + instance_name: \"rv_plic\", + param_values: {{ + src: 58, + target: {targets}, // We need *two targets* per hart: M and S modes + prio: 7, + nonstd_regs: 0 // Do *not* include these: MSIPs are not used and we use a 64 MiB address space + }}, +}} + +""".strip().format( + script=os.path.basename(__file__), + targets=num_targets +)) diff --git a/util/litmus b/util/litmus new file mode 100755 index 000000000..dd2b2445c --- /dev/null +++ b/util/litmus @@ -0,0 +1,116 @@ +#!/bin/bash + +######################################################## +## Post-processing of Litmus tests simulation output. ## +## Call any of the defined utility functions. ## +######################################################## + +USAGE="USAGE: $0 \nFor a list of commands available, run: $0 --help\n" +COMMANDS="show_vars create_list parse_uart patch_uart combine_logs check cleanup_simlogs" + +CWD=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +CHS_ROOT=$(cd ${CWD}/..; pwd) + +## Set helper variables to default values if not specified +set_vars () { + [ -z ${LITMUS_ROOT} ] && LITMUS_ROOT=${CHS_ROOT}/sw/tests/riscv-litmus-tests + [ -z ${LITMUS_WORK} ] && LITMUS_WORK=${CHS_ROOT}/work-litmus + [ -z ${LITMUS_LIST} ] && LITMUS_LIST=${LITMUS_WORK}/litmus-tests.list + [ -z ${LITMUS_SIMLOGS} ] && LITMUS_SIMLOGS=${LITMUS_WORK}/simlogs + [ -z ${LITMUS_UART} ] && LITMUS_UART=${LITMUS_WORK}/uart + [ -z ${LITMUS_LOGS} ] && LITMUS_LOGS=${LITMUS_WORK}/logs + [ -z ${LITMUS_LOG} ] && LITMUS_LOG=${LITMUS_WORK}/litmus.log +} + +## Print all helper variables (for debug purposes) +litmus_show_vars () { + echo "CHS_ROOT = ${CHS_ROOT}" + echo "LITMUS_WORK = ${LITMUS_WORK}" + echo "LITMUS_SIMLOGS = ${LITMUS_SIMLOGS}" + echo "LITMUS_UART = ${LITMUS_UART}" + echo "LITMUS_LOGS = ${LITMUS_LOGS}" +} + +## Write all names of litmus tests binaries in `${LITMUS_LIST}` file +litmus_create_list () { + [ -f ${LITMUS_LIST} ] && rm ${LITMUS_LIST} + touch ${LITMUS_LIST} + for f in $(find ${LITMUS_ROOT}/binaries/ -name "*.elf"); do + # f=$(echo $f | sed 's/\[/\\\[/g') # add backslashes before brackets + f=$(basename $f) + echo $f >> ${LITMUS_LIST} + done +} + +## Extract UART log from simulation transcripts +litmus_parse_uart () { + mkdir -p ${LITMUS_UART} + for file in $(ls ${LITMUS_SIMLOGS}/*.log); do + # Extract test name from file path + IFS='/' read -ra filename <<< "${file}" + filename=${filename[-1]} # remove basename + testname=${filename::-4} # remove ".log" at the end + sed -n 's/^# \[UART\] \(.*\)/\1/p' < ${file} > ${LITMUS_UART}/${filename}.uart.log + done +} + +## Patch the UART output with header and trailer +litmus_patch_uart () { + mkdir -p ${LITMUS_LOGS} + for file in $(ls ${LITMUS_UART}/*.uart.log); do + # Extract test name from file path + IFS='/' read -ra filename <<< "${file}" + filename=${filename[-1]} # remove basename + testname=${filename::-9} # remove ".uart.log" at the end + outfile="${LITMUS_LOGS}/${testname}.litmus.log" + echo "Test $(basename ${testname} .log) Allowed" > ${outfile} + echo "Histogram" >> ${outfile} + cat ${file} >> ${outfile} + echo "" >> ${outfile} + done +} + +## Combine all log files in `${LITMUS_LOGS}` directory within a single log file +litmus_combine_logs () { + [ -f ${LITMUS_LOG} ] && rm ${LITMUS_LOG} + for file in $(ls ${LITMUS_LOGS}/*); do + cat ${file} >> ${LITMUS_LOG} + done +} + +## Compare the Litmus tests logs with the reference model +litmus_check () { + cd ${LITMUS_ROOT} && MCMP7=/home/nwistoff/.opam/centos/bin/mcompare7 LITMUS_LOG=${LITMUS_LOG} ./ci/compare_model.sh +} + +## Clean up incomplete simulation log files in `${LITMUS_WORK}` directory. +## This can be useful in case some simulations failed (e.g. due to insufficient disk space). +litmus_cleanup_simlogs () { + for file in $(ls ${LITMUS_WORK}/*.log); do + grep -e \$finish ${file} > /tmp/null + [ $? == 0 ] || rm ${file} + done +} + +###################### +## Parse parameters ## +###################### + +if [ $# -lt 1 ]; then + printf "${USAGE}" + exit 1 +fi + +case $1 in + -h | --help) + printf "Commands available:\n" + for cmd in ${COMMANDS}; do echo "- ${cmd}"; done + ;; + *) + CMD="litmus_$1" + ;; +esac + +## Run specified command +set_vars +eval ${CMD}