Skip to content

Commit

Permalink
hw: Add native iDMA 2D capabilities, tune interconnect (#73)
Browse files Browse the repository at this point in the history
* iDMA: Add 2D support in hardware, tune interconnect

* hw: Some cleanup

---------

Co-authored-by: Alessandro Ottaviano <[email protected]>
Co-authored-by: Paul Scheffler <[email protected]>
  • Loading branch information
3 people authored Sep 19, 2023
1 parent a3488e5 commit f7568e2
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 19 deletions.
8 changes: 4 additions & 4 deletions Bender.lock
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ packages:
- apb
- register_interface
axi:
revision: bfee21757bf090ec8e358456314b0b0fd3c90809
version: 0.39.0
revision: fccffb5953ec8564218ba05e20adbedec845e014
version: 0.39.1
source:
Git: https://github.com/pulp-platform/axi.git
dependencies:
Expand Down Expand Up @@ -108,8 +108,8 @@ packages:
dependencies:
- common_cells
idma:
revision: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1
version: null
revision: 2c64e0773fab5a54757646715485fcdf3432c7c1
version: 0.5.0
source:
Git: https://github.com/pulp-platform/iDMA.git
dependencies:
Expand Down
2 changes: 1 addition & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.29.0 }
common_verification: { git: "https://github.com/pulp-platform/common_verification.git", version: 0.2.0 }
cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: pulp-v0.4.3 }
iDMA: { git: "https://github.com/pulp-platform/iDMA.git", rev: 437ffa9 } # TODO: master commit; use next release once out
iDMA: { git: "https://github.com/pulp-platform/iDMA.git", version: 0.5.0 }
opentitan_peripherals: { git: "https://github.com/pulp-platform/opentitan_peripherals.git", version: 0.4.0 }
register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.1 }
riscv-dbg: { git: "https://github.com/pulp-platform/riscv-dbg.git", version: 0.8.0 }
Expand Down
1 change: 1 addition & 0 deletions cheshire.mk
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ CHS_LLC_DIR := $(shell $(BENDER) path axi_llc)
OTPROOT := $(shell $(BENDER) path opentitan_peripherals)
CLINTROOT := $(shell $(BENDER) path clint)
AXI_VGA_ROOT := $(shell $(BENDER) path axi_vga)
IDMA_ROOT := $(shell $(BENDER) path idma)

REGTOOL ?= $(CHS_REG_DIR)/vendor/lowrisc_opentitan/util/regtool.py

Expand Down
7 changes: 6 additions & 1 deletion docs/um/arch.md
Original file line number Diff line number Diff line change
Expand Up @@ -236,13 +236,18 @@ The [Serial Link](https://github.com/pulp-platform/serial_link) is a fully digit

### DMA engine

The [iDMA engine](https://github.com/pulp-platform/iDMA) enables high-throughput asynchronous transfers between any two subordinate address ranges in the system. It exposes the following parameters:
The [iDMA engine](https://github.com/pulp-platform/iDMA) enables high-throughput asynchronous transfers between any two subordinate address ranges in the system. The hardware supports, if enabled, up to two-dimensional transfers directly in hardware. It exposes the following parameters:

| Parameter | Type / Range | Description |
| ---------------------------- | ------------ | ------------------------------------------------- |
| `DmaConfMax(Read|Write)Txns` | `dw_bt` | Max. number of outstanding requests to DMA config |
| `DmaConfAmoNumCuts` | `aw_bt` | Number of timing cuts inside config AMO filter |
| `DmaConfAmoPostCut` | `bit` | Whether to insert a cut after config AMO filter |
| `DmaConfEnableTwoD` | `bit` | Whether the 2D hardware extension is present |
| `DmaNumAxInFlight` | `dw_bt` | Number of outstanding transfers the DMA launches |
| `DmaMemSysDepth` | `dw_bt` | The *approximate* depth of the memory system |
| `DmaJobFifoDepth` | `aw_bt` | The depth of the job FIFO |
| `DmaRAWCouplingAvail` | `bit` | Whether the R-AW coupling feature is available |

### I2C, SPI, GPIOs

Expand Down
18 changes: 14 additions & 4 deletions hw/cheshire_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ package cheshire_pkg;
dw_bt DmaConfMaxWriteTxns;
aw_bt DmaConfAmoNumCuts;
bit DmaConfAmoPostCut;
bit DmaConfEnableTwoD;
dw_bt DmaNumAxInFlight;
dw_bt DmaMemSysDepth;
aw_bt DmaJobFifoDepth;
bit DmaRAWCouplingAvail;
// Parameters for GPIO
bit GpioInputSyncs;
// Parameters for AXI RT
Expand Down Expand Up @@ -514,8 +519,8 @@ package cheshire_pkg;
AxiDataWidth : 64,
AxiUserWidth : 2, // AMO(2)
AxiMstIdWidth : 2,
AxiMaxMstTrans : 8,
AxiMaxSlvTrans : 8,
AxiMaxMstTrans : 24,
AxiMaxSlvTrans : 24,
AxiUserAmoMsb : 1, // Convention: lower AMO bits for cores, MSB for serial link
AxiUserAmoLsb : 0, // Convention: lower AMO bits for cores, MSB for serial link
AxiUserDefault : 0,
Expand Down Expand Up @@ -550,8 +555,8 @@ package cheshire_pkg;
LlcSetAssoc : 8,
LlcNumLines : 256,
LlcNumBlocks : 8,
LlcMaxReadTxns : 8,
LlcMaxWriteTxns : 8,
LlcMaxReadTxns : 16,
LlcMaxWriteTxns : 16,
LlcAmoNumCuts : 1,
LlcAmoPostCut : 1,
LlcOutConnect : 1,
Expand All @@ -577,6 +582,11 @@ package cheshire_pkg;
DmaConfMaxWriteTxns : 4,
DmaConfAmoNumCuts : 1,
DmaConfAmoPostCut : 1,
DmaConfEnableTwoD : 1,
DmaNumAxInFlight : 16,
DmaMemSysDepth : 8,
DmaJobFifoDepth : 2,
DmaRAWCouplingAvail : 1,
// GPIOs
GpioInputSyncs : 1,
// AXI RT
Expand Down
23 changes: 14 additions & 9 deletions hw/cheshire_soc.sv
Original file line number Diff line number Diff line change
Expand Up @@ -1460,15 +1460,20 @@ module cheshire_soc import cheshire_pkg::*; #(
end

dma_core_wrap #(
.AxiAddrWidth ( Cfg.AddrWidth ),
.AxiDataWidth ( Cfg.AxiDataWidth ),
.AxiIdWidth ( Cfg.AxiMstIdWidth ),
.AxiUserWidth ( Cfg.AxiUserWidth ),
.AxiSlvIdWidth ( AxiSlvIdWidth ),
.axi_mst_req_t ( axi_mst_req_t ),
.axi_mst_rsp_t ( axi_mst_rsp_t ),
.axi_slv_req_t ( axi_slv_req_t ),
.axi_slv_rsp_t ( axi_slv_rsp_t )
.AxiAddrWidth ( Cfg.AddrWidth ),
.AxiDataWidth ( Cfg.AxiDataWidth ),
.AxiIdWidth ( Cfg.AxiMstIdWidth ),
.AxiUserWidth ( Cfg.AxiUserWidth ),
.AxiSlvIdWidth ( AxiSlvIdWidth ),
.NumAxInFlight ( Cfg.DmaNumAxInFlight ),
.MemSysDepth ( Cfg.DmaMemSysDepth ),
.JobFifoDepth ( Cfg.DmaJobFifoDepth ),
.RAWCouplingAvail ( Cfg.DmaRAWCouplingAvail ),
.IsTwoD ( Cfg.DmaConfEnableTwoD ),
.axi_mst_req_t ( axi_mst_req_t ),
.axi_mst_rsp_t ( axi_mst_rsp_t ),
.axi_slv_req_t ( axi_slv_req_t ),
.axi_slv_rsp_t ( axi_slv_rsp_t )
) i_dma (
.clk_i,
.rst_ni,
Expand Down
130 changes: 130 additions & 0 deletions sw/include/dif/dma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2022 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Alessandro Ottaviano <[email protected]>
// Thomas Benz <[email protected]>

#include <stdint.h>
#include "regs/idma.h"
#include "params.h"

#define DMA_SRC_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET)
#define DMA_DST_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET)
#define DMA_NUMBYTES_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET)
#define DMA_CONF_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET)
#define DMA_STATUS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET)
#define DMA_NEXTID_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET)
#define DMA_DONE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET)
#define DMA_SRC_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET)
#define DMA_DST_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET)
#define DMA_NUM_REPS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET)

#define DMA_CONF_DECOUPLE 0
#define DMA_CONF_DEBURST 0
#define DMA_CONF_SERIALIZE 0

#define X(NAME, BASE_ADDR) \
extern volatile uint64_t *NAME##_dma_src_ptr(void); \
extern volatile uint64_t *NAME##_dma_dst_ptr(void); \
extern volatile uint64_t *NAME##_dma_num_bytes_ptr(void); \
extern volatile uint64_t *NAME##_dma_conf_ptr(void); \
extern volatile uint64_t *NAME##_dma_status_ptr(void); \
extern volatile uint64_t *NAME##_dma_nextid_ptr(void); \
extern volatile uint64_t *NAME##_dma_done_ptr(void); \
extern volatile uint64_t *NAME##_dma_src_stride_ptr(void); \
extern volatile uint64_t *NAME##_dma_dst_stride_ptr(void); \
extern volatile uint64_t *NAME##_dma_num_reps_ptr(void); \
\
extern uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size); \
extern void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size); \
extern uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps); \
extern void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps); \
\
inline volatile uint64_t *NAME##_dma_src_ptr(void) { \
return (volatile uint64_t *)DMA_SRC_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_dst_ptr(void) { \
return (volatile uint64_t *)DMA_DST_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_num_bytes_ptr(void) { \
return (volatile uint64_t *)DMA_NUMBYTES_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_conf_ptr(void) { \
return (volatile uint64_t *)DMA_CONF_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_status_ptr(void) { \
return (volatile uint64_t *)DMA_STATUS_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_nextid_ptr(void) { \
return (volatile uint64_t *)DMA_NEXTID_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_done_ptr(void) { \
return (volatile uint64_t *)DMA_DONE_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_src_stride_ptr(void) { \
return (volatile uint64_t *)DMA_SRC_STRIDE_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_dst_stride_ptr(void) { \
return (volatile uint64_t *)DMA_DST_STRIDE_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_num_reps_ptr(void) { \
return (volatile uint64_t *)DMA_NUM_REPS_ADDR(BASE_ADDR); \
} \
\
inline uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \
*(NAME##_dma_src_ptr()) = (uint64_t)src; \
*(NAME##_dma_dst_ptr()) = (uint64_t)dst; \
*(NAME##_dma_num_bytes_ptr()) = size; \
*(NAME##_dma_num_reps_ptr()) = 0; \
*(NAME##_dma_conf_ptr()) = \
(DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \
(DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \
(DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \
return *(NAME##_dma_nextid_ptr()); \
} \
\
inline void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \
volatile uint64_t tf_id = NAME##_dma_memcpy(dst, src, size); \
while (*(NAME##_dma_done_ptr()) != tf_id) { \
asm volatile("nop"); \
} \
} \
\
inline uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps) { \
*(NAME##_dma_src_ptr()) = (uint64_t)src; \
*(NAME##_dma_dst_ptr()) = (uint64_t)dst; \
*(NAME##_dma_num_bytes_ptr()) = size; \
*(NAME##_dma_conf_ptr()) = \
(DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \
(DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \
(DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \
*(NAME##_dma_src_stride_ptr()) = src_stride; \
*(NAME##_dma_dst_stride_ptr()) = dst_stride; \
*(NAME##_dma_num_reps_ptr()) = num_reps; \
return *(NAME##_dma_nextid_ptr()); \
} \
\
inline void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps) { \
volatile uint64_t tf_id = \
NAME##_dma_2d_memcpy(dst, src, size, dst_stride, src_stride, num_reps); \
while (*(NAME##_dma_done_ptr()) != tf_id) { \
asm volatile("nop"); \
} \
} \
\
inline uint64_t NAME##_dma_get_status(void) { \
return *(NAME##_dma_status_ptr()); \
}

X(sys, &__base_dma);

#undef X
57 changes: 57 additions & 0 deletions sw/include/regs/idma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Generated register defines for idma_reg64_2d_frontend

// Copyright information found in source file:
// Copyright 2022 ETH Zurich and University of Bologna.

// Licensing information found in source file:
// Licensed under Solderpad Hardware License, Version 0.51
// SPDX-License-Identifier: SHL-0.51

#ifndef _IDMA_REG64_2D_FRONTEND_REG_DEFS_
#define _IDMA_REG64_2D_FRONTEND_REG_DEFS_

#ifdef __cplusplus
extern "C" {
#endif
// Register width
#define IDMA_REG64_2D_FRONTEND_PARAM_REG_WIDTH 64

// Source Address
#define IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET 0x0

// Destination Address
#define IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET 0x8

// Number of bytes
#define IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET 0x10

// Configuration Register for DMA settings
#define IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET 0x18
#define IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT 0
#define IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT 1
#define IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT 2

// DMA Status
#define IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET 0x20
#define IDMA_REG64_2D_FRONTEND_STATUS_BUSY_BIT 0

// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET 0x28

// Get ID of finished transactions.
#define IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET 0x30

// Source Stride
#define IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET 0x38

// Destination Stride
#define IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET 0x40

// Number of 2D repetitions
#define IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET 0x48

#ifdef __cplusplus
} // extern "C"
#endif
#endif // _IDMA_REG64_2D_FRONTEND_REG_DEFS_
// End generated register defines for idma_reg64_2d_frontend
1 change: 1 addition & 0 deletions sw/sw.mk
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ endef
$(eval $(call chs_sw_gen_hdr_rule,clint,$(CLINTROOT)/src/clint.hjson $(CLINTROOT)/.generated))
$(eval $(call chs_sw_gen_hdr_rule,serial_link,$(CHS_ROOT)/hw/serial_link.hjson $(CHS_SLINK_DIR)/.generated))
$(eval $(call chs_sw_gen_hdr_rule,axi_vga,$(AXI_VGA_ROOT)/data/axi_vga.hjson $(AXI_VGA_ROOT)/.generated))
$(eval $(call chs_sw_gen_hdr_rule,idma,$(IDMA_ROOT)/src/frontends/register_64bit_2d/idma_reg64_2d_frontend.hjson))
$(eval $(call chs_sw_gen_hdr_rule,axi_llc,$(CHS_LLC_DIR)/data/axi_llc_regs.hjson))
$(eval $(call chs_sw_gen_hdr_rule,cheshire,$(CHS_ROOT)/hw/regs/cheshire_regs.hjson))
$(eval $(call chs_sw_gen_hdr_rule,axi_rt,$(CHS_ROOT)/hw/regs/axi_rt_regs.hjson))
Expand Down
5 changes: 5 additions & 0 deletions target/xilinx/src/cheshire_top_xilinx.sv
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ module cheshire_top_xilinx
DmaConfMaxWriteTxns : 4,
DmaConfAmoNumCuts : 1,
DmaConfAmoPostCut : 1,
DmaConfEnableTwoD : 1,
DmaNumAxInFlight : 16,
DmaMemSysDepth : 8,
DmaJobFifoDepth : 2,
DmaRAWCouplingAvail : 1,
// GPIOs
GpioInputSyncs : 1,
// All non-set values should be zero
Expand Down

0 comments on commit f7568e2

Please sign in to comment.