Squash commits for public release
This commit is contained in:
0
Hardware/.keep
Normal file
0
Hardware/.keep
Normal file
0
Hardware/Constraints/.keep
Normal file
0
Hardware/Constraints/.keep
Normal file
22
Hardware/Constraints/constraints.xdc
Normal file
22
Hardware/Constraints/constraints.xdc
Normal file
@@ -0,0 +1,22 @@
|
||||
#--------------------------------------------------------------------------------
|
||||
#Copyright 1986-2015 Xilinx, Inc. All Rights Reserved.
|
||||
#--------------------------------------------------------------------------------
|
||||
#Tool Version: Vivado v.2015.4 (lin64) Build 1412921 Wed Nov 18 09:44:32 MST 2015
|
||||
#Date : 2025
|
||||
#--------------------------------------------------------------------------------
|
||||
|
||||
set_property IOSTANDARD LVCMOS18 [get_ports perst]
|
||||
set_property PULLUP true [get_ports perst]
|
||||
set_property LOC AV35 [get_ports perst]
|
||||
|
||||
#PCIe Reference Clock (Differential) Ports
|
||||
set_property PACKAGE_PIN K8 [get_ports REFCLK_p]
|
||||
set_property PACKAGE_PIN K7 [get_ports REFCLK_n]
|
||||
|
||||
#DDR3 Initial Calibration Complete Led Indication Output Port
|
||||
set_property PACKAGE_PIN AN39 [get_ports init_calib_complete]
|
||||
set_property IOSTANDARD LVCMOS18 [get_ports init_calib_complete]
|
||||
|
||||
set_property CLOCK_DEDICATED_ROUTE BACKBONE [get_nets pcie_acceleration_vc707_design_i/clocking_wizard/inst/clk_in1_pcie_acceleration_vc707_design_clk_wiz_1_0]
|
||||
|
||||
set_property LOC IBUFDS_GTE2_X1Y5 [get_cells refclk_ibuf]
|
||||
0
Hardware/HDL_Wrapper/.keep
Normal file
0
Hardware/HDL_Wrapper/.keep
Normal file
128
Hardware/HDL_Wrapper/hdl_wrapper.v
Normal file
128
Hardware/HDL_Wrapper/hdl_wrapper.v
Normal file
@@ -0,0 +1,128 @@
|
||||
//--------------------------------------------------------------------------------
|
||||
//Copyright 1986-2015 Xilinx, Inc. All Rights Reserved.
|
||||
//--------------------------------------------------------------------------------
|
||||
//Tool Version: Vivado v.2015.4 (lin64) Build 1412921 Wed Nov 18 09:44:32 MST 2015
|
||||
//Date : 2025
|
||||
//--------------------------------------------------------------------------------
|
||||
|
||||
`timescale 1 ps / 1 ps
|
||||
|
||||
module pcie_acceleration_vc707_design_wrapper
|
||||
(REFCLK_p,
|
||||
REFCLK_n,
|
||||
ddr3_sdram_addr,
|
||||
ddr3_sdram_ba,
|
||||
ddr3_sdram_cas_n,
|
||||
ddr3_sdram_ck_n,
|
||||
ddr3_sdram_ck_p,
|
||||
ddr3_sdram_cke,
|
||||
ddr3_sdram_cs_n,
|
||||
ddr3_sdram_dm,
|
||||
ddr3_sdram_dq,
|
||||
ddr3_sdram_dqs_n,
|
||||
ddr3_sdram_dqs_p,
|
||||
ddr3_sdram_odt,
|
||||
ddr3_sdram_ras_n,
|
||||
ddr3_sdram_reset_n,
|
||||
ddr3_sdram_we_n,
|
||||
init_calib_complete,
|
||||
pcie_7x_mgt_rxn,
|
||||
pcie_7x_mgt_rxp,
|
||||
pcie_7x_mgt_txn,
|
||||
pcie_7x_mgt_txp,
|
||||
perst,
|
||||
reset,
|
||||
rs232_uart_rxd,
|
||||
rs232_uart_txd,
|
||||
sys_diff_clock_clk_n,
|
||||
sys_diff_clock_clk_p);
|
||||
input REFCLK_p;
|
||||
input REFCLK_n;
|
||||
output [13:0]ddr3_sdram_addr;
|
||||
output [2:0]ddr3_sdram_ba;
|
||||
output ddr3_sdram_cas_n;
|
||||
output [0:0]ddr3_sdram_ck_n;
|
||||
output [0:0]ddr3_sdram_ck_p;
|
||||
output [0:0]ddr3_sdram_cke;
|
||||
output [0:0]ddr3_sdram_cs_n;
|
||||
output [7:0]ddr3_sdram_dm;
|
||||
inout [63:0]ddr3_sdram_dq;
|
||||
inout [7:0]ddr3_sdram_dqs_n;
|
||||
inout [7:0]ddr3_sdram_dqs_p;
|
||||
output [0:0]ddr3_sdram_odt;
|
||||
output ddr3_sdram_ras_n;
|
||||
output ddr3_sdram_reset_n;
|
||||
output ddr3_sdram_we_n;
|
||||
output init_calib_complete;
|
||||
input [3:0]pcie_7x_mgt_rxn;
|
||||
input [3:0]pcie_7x_mgt_rxp;
|
||||
output [3:0]pcie_7x_mgt_txn;
|
||||
output [3:0]pcie_7x_mgt_txp;
|
||||
input perst;
|
||||
input reset;
|
||||
input rs232_uart_rxd;
|
||||
output rs232_uart_txd;
|
||||
input sys_diff_clock_clk_n;
|
||||
input sys_diff_clock_clk_p;
|
||||
|
||||
wire REFCLK_p;
|
||||
wire REFCLK_n;
|
||||
wire [13:0]ddr3_sdram_addr;
|
||||
wire [2:0]ddr3_sdram_ba;
|
||||
wire ddr3_sdram_cas_n;
|
||||
wire [0:0]ddr3_sdram_ck_n;
|
||||
wire [0:0]ddr3_sdram_ck_p;
|
||||
wire [0:0]ddr3_sdram_cke;
|
||||
wire [0:0]ddr3_sdram_cs_n;
|
||||
wire [7:0]ddr3_sdram_dm;
|
||||
wire [63:0]ddr3_sdram_dq;
|
||||
wire [7:0]ddr3_sdram_dqs_n;
|
||||
wire [7:0]ddr3_sdram_dqs_p;
|
||||
wire [0:0]ddr3_sdram_odt;
|
||||
wire ddr3_sdram_ras_n;
|
||||
wire ddr3_sdram_reset_n;
|
||||
wire ddr3_sdram_we_n;
|
||||
wire init_calib_complete;
|
||||
wire [3:0]pcie_7x_mgt_rxn;
|
||||
wire [3:0]pcie_7x_mgt_rxp;
|
||||
wire [3:0]pcie_7x_mgt_txn;
|
||||
wire [3:0]pcie_7x_mgt_txp;
|
||||
wire perst;
|
||||
wire reset;
|
||||
wire rs232_uart_rxd;
|
||||
wire rs232_uart_txd;
|
||||
wire sys_diff_clock_clk_n;
|
||||
wire sys_diff_clock_clk_p;
|
||||
|
||||
IBUFDS_GTE2 refclk_ibuf (.O(REFCLK), .ODIV2(), .I(REFCLK_p), .CEB(1'b0), .IB(REFCLK_n));
|
||||
|
||||
|
||||
pcie_acceleration_vc707_design pcie_acceleration_vc707_design_i
|
||||
(.REFCLK(REFCLK),
|
||||
.ddr3_sdram_addr(ddr3_sdram_addr),
|
||||
.ddr3_sdram_ba(ddr3_sdram_ba),
|
||||
.ddr3_sdram_cas_n(ddr3_sdram_cas_n),
|
||||
.ddr3_sdram_ck_n(ddr3_sdram_ck_n),
|
||||
.ddr3_sdram_ck_p(ddr3_sdram_ck_p),
|
||||
.ddr3_sdram_cke(ddr3_sdram_cke),
|
||||
.ddr3_sdram_cs_n(ddr3_sdram_cs_n),
|
||||
.ddr3_sdram_dm(ddr3_sdram_dm),
|
||||
.ddr3_sdram_dq(ddr3_sdram_dq),
|
||||
.ddr3_sdram_dqs_n(ddr3_sdram_dqs_n),
|
||||
.ddr3_sdram_dqs_p(ddr3_sdram_dqs_p),
|
||||
.ddr3_sdram_odt(ddr3_sdram_odt),
|
||||
.ddr3_sdram_ras_n(ddr3_sdram_ras_n),
|
||||
.ddr3_sdram_reset_n(ddr3_sdram_reset_n),
|
||||
.ddr3_sdram_we_n(ddr3_sdram_we_n),
|
||||
.init_calib_complete(init_calib_complete),
|
||||
.pcie_7x_mgt_rxn(pcie_7x_mgt_rxn),
|
||||
.pcie_7x_mgt_rxp(pcie_7x_mgt_rxp),
|
||||
.pcie_7x_mgt_txn(pcie_7x_mgt_txn),
|
||||
.pcie_7x_mgt_txp(pcie_7x_mgt_txp),
|
||||
.perst(perst),
|
||||
.reset(reset),
|
||||
.rs232_uart_rxd(rs232_uart_rxd),
|
||||
.rs232_uart_txd(rs232_uart_txd),
|
||||
.sys_diff_clock_clk_n(sys_diff_clock_clk_n),
|
||||
.sys_diff_clock_clk_p(sys_diff_clock_clk_p));
|
||||
endmodule
|
||||
0
Hardware/Vivado_Block_Design/.keep
Normal file
0
Hardware/Vivado_Block_Design/.keep
Normal file
2679
Hardware/Vivado_Block_Design/pcie_acceleration_vc707_design.tcl
Normal file
2679
Hardware/Vivado_Block_Design/pcie_acceleration_vc707_design.tcl
Normal file
File diff suppressed because it is too large
Load Diff
0
Hardware/Vivado_HLS_IPs/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/.keep
Normal file
@@ -0,0 +1,518 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "acceleration_scheduler_direct.h"
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Registers of the Sobel Filter
|
||||
* -----------------------------
|
||||
*/
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_TX_OFFSET 0x00000000
|
||||
|
||||
/*
|
||||
* Rx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_RX_OFFSET 0x00000030
|
||||
|
||||
|
||||
/*
|
||||
* This Set of Registers are Applicable for both Channels of the DMA.
|
||||
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
|
||||
*/
|
||||
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
|
||||
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
|
||||
|
||||
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
|
||||
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
|
||||
|
||||
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM)
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
|
||||
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
|
||||
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
|
||||
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
|
||||
|
||||
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
|
||||
|
||||
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
|
||||
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
|
||||
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
|
||||
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
|
||||
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
|
||||
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
|
||||
|
||||
/*
|
||||
* acceleration_scheduler_direct()
|
||||
*
|
||||
* The Hardware Funtionality of the Acceleration Scheduler Direct Core.
|
||||
*
|
||||
* The Acceleration Scheduler Direct Core is Part of the Acceleration Group Direct and is Used to Manage the whole Acceleration Procedure.
|
||||
* It Interacts with the DMA, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
|
||||
* It, also, Interacts with the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* a --> Enable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* b --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
|
||||
* c --> Setup and Start the Sobel Filter.
|
||||
* d --> Setup and Start the S2MM and MM2S DMA Transfers.
|
||||
* e --> Wait for an Interrupt by the DMA on Completion of the Transfer.
|
||||
* f --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
|
||||
* g --> Disable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* h --> Acknowledge the DMA Interrupt.
|
||||
* i --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* j --> Reset the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* k --> Inform the Interrupt Manager About the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA.
|
||||
* 03 to 13 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int acceleration_scheduler_direct(/*01*/volatile ap_uint<32> *mm2s_ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *dma_intr_in,
|
||||
/*03*/unsigned int dma_device_address,
|
||||
/*04*/unsigned int sobel_device_address,
|
||||
/*05*/unsigned int interrupt_manager_register_offset,
|
||||
/*06*/unsigned int apm_device_address,
|
||||
/*07*/unsigned int shared_apm_device_address,
|
||||
/*08*/unsigned int shared_metrics_address,
|
||||
/*09*/unsigned int image_cols,
|
||||
/*10*/unsigned int image_rows,
|
||||
/*11*/unsigned int host_mem_src_data_address,
|
||||
/*12*/unsigned int host_mem_dst_data_address,
|
||||
/*13*/unsigned int initiator_group
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The mm2s_ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=mm2s_ext_cfg
|
||||
|
||||
/*
|
||||
* The dma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=dma_intr_in
|
||||
|
||||
/*
|
||||
* The dma_device_address is a Register to Store the Base Address of the DMA that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The sobel_device_address is a Register to Store the Base Address of the Sobel Filter that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=sobel_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The apm_device_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_device_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The host_mem_src_data_address is a Register to Store the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=host_mem_src_data_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The host_mem_dst_data_address is a Register to Store the Destination Address that the DMA will Use to Write the Processed Image Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=host_mem_dst_data_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_cols bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_rows bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The initiator_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=initiator_group bundle=mm2s_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=mm2s_cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
|
||||
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
|
||||
|
||||
ap_uint<32> write_transactions; // Store the Write Transactions from the APM.
|
||||
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
|
||||
|
||||
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
|
||||
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
|
||||
|
||||
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
|
||||
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
|
||||
|
||||
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<1> dma_intr_in_value; // Used to Read the Last Value of the dma_intr_in Input Port.
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------
|
||||
* Enable the APM Counters
|
||||
* -----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
|
||||
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------
|
||||
* Setup and Start the Sobel Filter
|
||||
* --------------------------------
|
||||
*/
|
||||
|
||||
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
|
||||
data_register = image_cols;
|
||||
|
||||
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
|
||||
data_register = image_rows;
|
||||
|
||||
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the Sobel Filter.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start Device to DMA Transfer (S2MM)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (host_mem_dst_data_address) of the Core the Destination Address that the DMA will Use to Write the Processed Image Data.
|
||||
data_register = host_mem_dst_data_address;
|
||||
|
||||
//Write the Destination Address to the Destination Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the S2MM Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows) * 4;
|
||||
|
||||
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start DMA to Device Transfer (MM2S)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (host_mem_src_data_address) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
data_register = host_mem_src_data_address;
|
||||
|
||||
//Write the Source Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the MM2S Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows) * 4;
|
||||
|
||||
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Wait for a DMA Interrupt
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the dma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(dma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
dma_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Disable the APM Counters
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
|
||||
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------
|
||||
* Read the DMA S2MM Status Register to Get the IRQs (IOC, Delay, Error)
|
||||
* IOC Stands for: Interrupt On Complete
|
||||
* ---------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the S2MM Status Register of the DMA which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the Triggered Interrupts
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Write the new Value Back to the Status Register of the DMA which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&read_transactions, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&read_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&write_transactions, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&write_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&stream_packets, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&stream_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&gcc_lower, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&gcc_upper, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------
|
||||
* Reset the APM Counters
|
||||
* ----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&initial_data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
|
||||
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
|
||||
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Release the Reset.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------------------------------------------------------
|
||||
* Inform the Interrupt Manager that this Core Has Completed the Acceleration Procedure
|
||||
* ------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (initiator_group) of the Core the Current Acceleration Group Number that this Core Belongs to.
|
||||
data_register = initiator_group;
|
||||
|
||||
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (interrupt_manager_register_offset) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
open_project Acceleration_Scheduler_Direct
|
||||
|
||||
set_top acceleration_scheduler_direct
|
||||
|
||||
add_files acceleration_scheduler_direct.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Acceleration Scheduler Direct" -version "3.5"
|
||||
|
||||
exit
|
||||
@@ -0,0 +1,715 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "acceleration_scheduler_indirect.h"
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Registers of the Sobel Filter
|
||||
* -----------------------------
|
||||
*/
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_TX_OFFSET 0x00000000
|
||||
|
||||
/*
|
||||
* Rx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_RX_OFFSET 0x00000030
|
||||
|
||||
|
||||
/*
|
||||
* This Set of Registers are Applicable for both Channels of the DMA.
|
||||
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
|
||||
*/
|
||||
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
|
||||
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
|
||||
|
||||
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
|
||||
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
|
||||
|
||||
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM)
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
|
||||
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
|
||||
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
|
||||
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
|
||||
|
||||
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
|
||||
|
||||
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
|
||||
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
|
||||
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
|
||||
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
|
||||
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
|
||||
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
|
||||
|
||||
/*
|
||||
* acceleration_scheduler_indirect()
|
||||
*
|
||||
* The Hardware Funtionality of the Acceleration Scheduler Indirect Core.
|
||||
*
|
||||
* The Acceleration Scheduler Indirect Core is Part of the Acceleration Group Indirect and is Used to Manage the whole Acceleration Procedure.
|
||||
* It Interacts with the DMA, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
|
||||
* It, also, Interacts with the CDMA Fetch and CDMA Send Peripherals and the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* a --> Set the Scheduler Buffer of the Fetch Scheduler with Info that the Fetch Scheduler will Use to Start the CDMA Fetch Transfer
|
||||
* from the Host Memory to the FPGA's DDR3.
|
||||
* b --> Wait for the Fetch Scheduler to Send a Start Signal (start Input) when the CDMA Fetch Has Completed the Transfer.
|
||||
* c --> Enable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* d --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
|
||||
* e --> Setup and Start the Sobel Filter.
|
||||
* f --> Setup and Start the S2MM and MM2S DMA Transfers.
|
||||
* g --> Wait for an Interrupt by the DMA on Completion of the Transfer.
|
||||
* h --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
|
||||
* i --> Disable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* j --> Acknowledge the DMA Interrupt.
|
||||
* k --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* l --> Reset the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* m --> Set the Scheduler Buffer of the Send Scheduler with Info that the Send Scheduler will Use to Start the CDMA Send Transfer
|
||||
* from the Host Memory to the FPGA's DDR3.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Start Signals from the Fetch Scheduler.
|
||||
* 03 --------> Single Bit Input Used to Receive External Interrupts from the DMA.
|
||||
* 04 to 27 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int acceleration_scheduler_indirect(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *start,
|
||||
/*03*/volatile ap_uint<1> *dma_intr_in,
|
||||
/*04*/unsigned int scheduler_buffer_base_address_f,
|
||||
/*05*/unsigned int src_address_reg_offset_f,
|
||||
/*06*/unsigned int dst_address_reg_offset_f,
|
||||
/*07*/unsigned int data_size_reg_offset_f,
|
||||
/*08*/unsigned int offset_reg_offset_f,
|
||||
/*09*/unsigned int src_address_f,
|
||||
/*10*/unsigned int dst_address_f,
|
||||
/*11*/unsigned int offset_f,
|
||||
/*12*/unsigned int scheduler_buffer_base_address_s,
|
||||
/*13*/unsigned int src_address_reg_offset_s,
|
||||
/*14*/unsigned int dst_address_reg_offset_s,
|
||||
/*15*/unsigned int data_size_reg_offset_s,
|
||||
/*16*/unsigned int offset_reg_offset_s,
|
||||
/*17*/unsigned int src_address_s,
|
||||
/*18*/unsigned int dst_address_s,
|
||||
/*19*/unsigned int offset_s,
|
||||
/*20*/unsigned int dma_base_address,
|
||||
/*21*/unsigned int sobel_base_address,
|
||||
/*22*/unsigned int image_cols,
|
||||
/*23*/unsigned int image_rows,
|
||||
/*24*/unsigned int accel_group,
|
||||
/*25*/unsigned int shared_apm_base_address,
|
||||
/*26*/unsigned int shared_metrics_base_address,
|
||||
/*27*/unsigned int apm_base_address
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The start is a Single Bit Input which is Used to Receive External Start Signals from the Fetch Scheduler.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=start
|
||||
|
||||
/*
|
||||
* The dma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=dma_intr_in
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address_f is a Register to Store the Base Address of the Scheduler Buffer of the Fetch Scheduler.
|
||||
* This Base Address will be Needed by the ext_cfg AXI Master Interface to Access the Scheduler Buffer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Source Address that the CDMA Fetch will Read the Data from.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Destination Address that the CDMA Fetch will Write the Data to.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Data Size of the CDMA Fetch Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Offset from the Source and Destination Base Addresses that the CDMA Fetch will Use to Make the Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_f is a Register to Store the Source Address that the CDMA Fetch will Use to Read the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_f is a Register to Store the Destination Address that the CDMA Fetch will Use to Write the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_f is a Register to Store the Offset from the Source and Destination Base Addresses where the Image Data Might be Present.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address_s is a Register to Store the Base Address of the Scheduler Buffer of the Send Scheduler.
|
||||
* This Base Address will be Needed by the ext_cfg AXI Master Interface to Access the Scheduler Buffer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Source Address that the CDMA Send will Read the Data from.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Destination Address that the CDMA Send will Write the Data to.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Data Size of the CDMA Send Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Offset from the Source and Destination Base Addresses that the CDMA Send will Use to Make the Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_s is a Register to Store the Source Address that the CDMA Send will Use to Read the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_s is a Register to Store the Destination Address that the CDMA Send will Use to Write the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_s is a Register to Store the Offset from the Source and Destination Base Addresses where the Image Data Might be Present.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dma_base_address is a Register to Store the Base Address of the DMA that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The sobel_base_address is a Register to Store the Base Address of the Sobel Filter that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=sobel_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_cols bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_rows bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The accel_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The apm_base_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=apm_base_address bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<1> start_value; // Used to Read the Last Value of the start Input Port.
|
||||
ap_uint<1> dma_intr_in_value; // Used to Read the Last Value of the dma_intr_in Input Port.
|
||||
|
||||
|
||||
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
|
||||
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
|
||||
|
||||
ap_uint<32> write_transactions; // Store the Write Transactions from the APM.
|
||||
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
|
||||
|
||||
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
|
||||
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
|
||||
|
||||
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
|
||||
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
* Set the Registers of the Scheduler Buffer of the Fetch Scheduler with the Source and Destination Addresses, the Offset and the Data Size.
|
||||
* The Fetch Scheduler will Use the above to Start the CDMA Fetch Transfer from the Host Memory to the FPGA's DDR3.
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (src_address_f) the Source Address for the CDMA Fetch Transfer.
|
||||
data_register = src_address_f;
|
||||
|
||||
//Write the Source Address for the CDMA Fetch Transfer to the Source Address Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + src_address_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (dst_address_f) the Destination Address for the CDMA Fetch Transfer.
|
||||
data_register = dst_address_f;
|
||||
|
||||
//Write the Destination Address for the CDMA Fetch Transfer to the Destination Address Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + dst_address_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (offset_f) the Offset Value for the CDMA Fetch Transfer.
|
||||
data_register = offset_f;
|
||||
|
||||
//Write the Offset Value for the CDMA Fetch Transfer to the Offset Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + offset_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Calculate from the Internal Registers (image_cols, image_rows) the Data Size for the CDMA Fetch Transfer.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Data Size for the CDMA Fetch Transfer to the Data Size Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + data_size_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------
|
||||
* Wait for Start Signal from the Fetch Scheduler
|
||||
* ----------------------------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the start Input.
|
||||
start_value = *start;
|
||||
|
||||
//Keep Looping for as long as the start Input Does not Reach a Logic 1 Value.
|
||||
while(start_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the start Input.
|
||||
start_value = *start;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
start_value = 0;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------
|
||||
* Enable the APM Counters
|
||||
* -----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
|
||||
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------
|
||||
* Setup and Start the Sobel Filter
|
||||
* --------------------------------
|
||||
*/
|
||||
|
||||
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
|
||||
data_register = image_cols;
|
||||
|
||||
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
|
||||
data_register = image_rows;
|
||||
|
||||
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the Sobel Filter.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start Device to DMA Transfer (S2MM)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (src_address_s) of the Core the Destination Address that the DMA will Use to Write the Processed Image Data.
|
||||
//NOTE that the Destination Address of the DMA S2MM Transfer is the Source Address of the CDMA Send Transfer.
|
||||
data_register = src_address_s;
|
||||
|
||||
//Write the Destination Address to the Destination Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the S2MM Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start DMA to Device Transfer (MM2S)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (dst_address_f) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
//NOTE that the Destination Address of the CDMA Fetch Transfer is the Source Address of the DMA MM2S Transfer.
|
||||
data_register = dst_address_f;
|
||||
|
||||
//Write the Source Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the MM2S Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Wait for a DMA Interrupt
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the dma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(dma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
dma_intr_in_value = 0;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Disable the APM Counters
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
|
||||
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------
|
||||
* Read the DMA S2MM Status Register to Get the IRQs (IOC, Delay, Error)
|
||||
* IOC Stands for: Interrupt On Complete
|
||||
* ---------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the S2MM Status Register of the DMA which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the Triggered Interrupts
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Write the new Value Back to the Status Register of the DMA which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_transactions, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_transactions, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_packets, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_lower, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_upper, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------
|
||||
* Reset the APM Counters
|
||||
* ----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&initial_data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
|
||||
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
|
||||
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Release the Reset.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
* Set the Registers of the Scheduler Buffer of the Send Scheduler with the Source and Destination Addresses, the Offset and the Data Size.
|
||||
* The Send Scheduler will Use the above to Start the CDMA Send Transfer from the Host Memory to the FPGA's DDR3.
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (src_address_s) the Source Address for the CDMA Transfer.
|
||||
data_register = src_address_s;
|
||||
|
||||
//Write the Source Address for the CDMA Send Transfer to the Source Address Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + src_address_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (dst_address_s) the Destination Address for the CDMA Send Transfer.
|
||||
data_register = dst_address_s;
|
||||
|
||||
//Write the Destination Address for the CDMA Send Transfer to the Destination Address Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + dst_address_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (offset_s) the Offset Value for the CDMA Send Transfer.
|
||||
data_register = offset_s;
|
||||
|
||||
//Write the Offset Value for the CDMA Send Transfer to the Offset Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + offset_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Calculate from the Internal Registers (image_cols, image_rows) the Data Size for the CDMA Send Transfer.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Data Size for the CDMA Send Transfer to the Data Size Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + data_size_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
open_project Acceleration_Scheduler_Indirect
|
||||
|
||||
set_top acceleration_scheduler_indirect
|
||||
|
||||
add_files acceleration_scheduler_indirect.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Acceleration Scheduler Indirect" -version "2.0"
|
||||
|
||||
exit
|
||||
@@ -0,0 +1,476 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "acceleration_scheduler_sg_xdma.h"
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Registers of the Sobel Filter
|
||||
* -----------------------------
|
||||
*/
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM)
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
|
||||
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
|
||||
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
|
||||
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
|
||||
|
||||
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
|
||||
|
||||
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
|
||||
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
|
||||
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
|
||||
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
|
||||
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
|
||||
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------
|
||||
* Registers of the DMA SG PCIe Scheduler
|
||||
* --------------------------------------
|
||||
*/
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL 0x00 // Control Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE 0x04 // Global Interrupt Enable Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER 0x08 // Interrupt Enable Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_ISR 0x0C // Interrupt Interrupt Status Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_REQUESTED_DATA_SIZE_DATA 0x20 // Data Size Register for the Scatter/Gather Transfer.
|
||||
|
||||
/*
|
||||
* acceleration_scheduler_sg_xdma()
|
||||
*
|
||||
* The Hardware Funtionality of the Acceleration Scheduler Scatter/Gather Core.
|
||||
*
|
||||
* The Acceleration Scheduler Scatter/Gather Core is Part of the Acceleration Group Scatter/Gather and is Used to Manage the whole Acceleration Procedure.
|
||||
* It Interacts with the DMA SG PCIe Scheduler, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
|
||||
* It, also, Interacts with the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* a --> Enable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* b --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
|
||||
* c --> Setup and Start the Sobel Filter.
|
||||
* d --> Enable the Interrupts of the DMA SG PCIe Scheduler.
|
||||
* e --> Setup and Start the DMA SG PCIe Scheduler.
|
||||
* f --> Wait for an Interrupt by the DMA SG PCIe Scheduler on Completion of the Acceleration.
|
||||
* g --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
|
||||
* h --> Disable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* i --> Clear and Re-Enable the Interrupts of the DMA SG PCIe Scheduler.
|
||||
* j --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* k --> Reset the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* l --> Inform the Interrupt Manager About the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA SG PCIe Scheduler.
|
||||
* 03 to 11 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int acceleration_scheduler_sg_xdma(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *scheduler_intr_in,
|
||||
/*03*/unsigned int dma_sg_pcie_scheduler_base_address,
|
||||
/*04*/unsigned int sobel_device_address,
|
||||
/*05*/unsigned int interrupt_manager_register_offset,
|
||||
/*06*/unsigned int apm_device_address,
|
||||
/*07*/unsigned int shared_apm_device_address,
|
||||
/*08*/unsigned int shared_metrics_address,
|
||||
/*09*/unsigned int image_cols,
|
||||
/*10*/unsigned int image_rows,
|
||||
/*11*/unsigned int accel_group
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA SG PCIe Scheduler.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=scheduler_intr_in
|
||||
|
||||
/*
|
||||
* The dma_sg_pcie_scheduler_base_address is a Register to Store the Base Address of the DMA SG PCIe Scheduler that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_sg_pcie_scheduler_base_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The sobel_device_address is a Register to Store the Base Address of the Sobel Filter that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=sobel_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The apm_device_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_device_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_cols bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_rows bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The accel_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group bundle=mm2s_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=mm2s_cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
|
||||
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
|
||||
|
||||
ap_uint<32> write_transactions; // Store the Write Transactions from the APM
|
||||
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
|
||||
|
||||
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
|
||||
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
|
||||
|
||||
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
|
||||
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
|
||||
|
||||
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<1> scheduler_intr_in_value; // Used to Read the Last Value of the scheduler_intr_in_value Input Port.
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------
|
||||
* Enable the APM Counters
|
||||
* -----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
|
||||
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------
|
||||
* Setup and Start the Sobel Filter
|
||||
* --------------------------------
|
||||
*/
|
||||
|
||||
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
|
||||
data_register = image_cols;
|
||||
|
||||
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
|
||||
data_register = image_rows;
|
||||
|
||||
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the Sobel Filter.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------
|
||||
* Enable the Interrupts for the DMA SG PCIe Scheduler
|
||||
* --------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with a Mask to Configure the IER that all the Available IRQs Should be Enabled.
|
||||
data_register = data_register | 0xFFFFFFFF;
|
||||
|
||||
//Write the new Value Back to the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = 0x1;
|
||||
|
||||
//Write the data_register Value to the Global Interrupt Enable Register (GIE) of the DMA SG PCIe Scheduler to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------
|
||||
* Setup and Start the DMA SG PCIe Scheduler
|
||||
* -----------------------------------------
|
||||
*/
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = image_rows * image_cols * 4;
|
||||
|
||||
//Write the Transfer Size to the Requested Data Size Register of the DMA SG PCIe Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_REQUESTED_DATA_SIZE_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the DMA SG PCIe Scheduler.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA SG PCIe Scheduler so that the DMA SG PCIe Scheduler Gets Started.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------------
|
||||
* Wait for a DMA SG PCIe Scheduler Interrupt
|
||||
* ------------------------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the scheduler_intr_in_value Input.
|
||||
scheduler_intr_in_value = *scheduler_intr_in;
|
||||
|
||||
//Keep Looping for as long as the scheduler_intr_in_value Input Does not Reach a Logic 1 Value.
|
||||
while(scheduler_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the scheduler_intr_in Input.
|
||||
scheduler_intr_in_value = *scheduler_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
scheduler_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Disable the APM Counters
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
|
||||
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Clear and then Re-Enable the DMA SG PCIe Scheduler Interrupts
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Set a Mask to Clear the Interrupt Status Register of the DMA SG PCIe Scheduler.
|
||||
data_register = data_register | 0xFFFFFFFF;
|
||||
|
||||
//Clear the Interrupt Status Register of the DMA SG PCIe Scheduler According to the Previous Mask.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_ISR) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
//Read the Interrupt Enable Register of the DMA SG PCIe Scheduler
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with a Mask to Configure the IER that all the Available IRQs Should be Enabled.
|
||||
data_register = data_register | 0xFFFFFFFF;
|
||||
|
||||
//Write the new Value Back to the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = 0x1;
|
||||
//Write the data_register Value to the Global Interrupt Enable Register (GIE) of the DMA SG PCIe Scheduler to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_transactions, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_transactions, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_packets, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_lower, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_upper, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------
|
||||
* Reset the APM Counters
|
||||
* ----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&initial_data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
|
||||
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
|
||||
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Release the Reset.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------------------------------------------------------
|
||||
* Inform the Interrupt Manager that this Core Has Completed the Acceleration Procedure
|
||||
* ------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (accel_group) of the Core the Current Acceleration Group Number that this Core Belongs to.
|
||||
data_register = accel_group;
|
||||
|
||||
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (interrupt_manager_register_offset) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
open_project Acceleration_Scheduler_SG_XDMA
|
||||
|
||||
set_top acceleration_scheduler_sg_xdma
|
||||
|
||||
add_files acceleration_scheduler_sg_xdma.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Acceleration Scheduler SG XDMA" -version "3.5"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/.keep
Normal file
@@ -0,0 +1,698 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "dma_sg_pcie_scheduler.h"
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_TX_OFFSET 0x00000000
|
||||
|
||||
/*
|
||||
* Rx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_RX_OFFSET 0x00000030
|
||||
|
||||
|
||||
/*
|
||||
* This Set of Registers are Applicable for both Channels of the DMA.
|
||||
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
|
||||
*/
|
||||
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
|
||||
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
|
||||
|
||||
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
|
||||
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
|
||||
|
||||
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
|
||||
|
||||
/*
|
||||
* serve_mm2s_transfer()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is as Follows:
|
||||
*
|
||||
* a --> Get from the Scatter/Gather List of the Source Memory the Physical Address of the Current Page to Transfer.
|
||||
* b --> Set the Address Translation Register of the PCIe Bridge's Source AXI BAR with the Physical Address of the Current Page to Transfer.
|
||||
* c --> Setup and Start the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
* 03 --> The Data Size of the MM2S Transfer.
|
||||
* 04 --> The Address of the Scatter/Gather List of the Source Memory.
|
||||
* 05 --> The Address of the BCIe Bridge's Source AXI BAR.
|
||||
* 06 --> The Offset in the PCIe Bridge of the Address Translation Register that Refers to the Source AXI BAR.
|
||||
* 07 --> The Current Value of the Page Counter in order to Know which Physical Address to Extract from the Source Scatter/Gather List.
|
||||
* 08 --> The Transfer Size for the Current Page which Might be Less than the Page Size.
|
||||
*/
|
||||
int serve_mm2s_transfer(/*01*/volatile ap_uint<32> *cfg,
|
||||
/*02*/unsigned int dma_device_address,
|
||||
/*03*/unsigned int src_data_size,
|
||||
/*04*/unsigned int sgl_address,
|
||||
/*05*/unsigned int axi_bar_src_address,
|
||||
/*06*/unsigned int axi_bar_src_cfg_address,
|
||||
/*07*/int page_counter,
|
||||
/*08*/ap_uint<32>current_transfer_size
|
||||
)
|
||||
{
|
||||
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Get the Physical Address of the Current Page of the Scatter/Gather List and Set the Source AXI BAR of the PCIe Bridge
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the 64 Bit Physical Address of the Current Page from the Source Scatter/Gather List.
|
||||
//The data_register_array[0] Holds the 32 LSBs of the Physical Address.
|
||||
//The data_register_array[1] Holds the 32 MSBs of the Physical Address.
|
||||
memcpy(data_register_array, (const ap_uint<32> *)(cfg + ((sgl_address + (page_counter * sizeof(ap_uint<64>))) / 4)), sizeof(ap_uint<64>));
|
||||
|
||||
data_register = data_register_array[0];
|
||||
//Write the 32 LSBs of the Physical Address of the Current Page to the Lower Register of the Source AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_src_cfg_address) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = data_register_array[1];
|
||||
//Write the 32 MSBs of the Physical Address of the Current Page to the Upper Register of the Source AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_src_cfg_address - 4) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start DMA to Device Transfer (MM2S)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (axi_bar_src_address) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
//The Source Address of the DMA MM2S Channel will be the Source AXI BAR which Corresponds to the Physical Address of the Current Page.
|
||||
data_register = axi_bar_src_address;
|
||||
|
||||
//Write the Source Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the MM2S Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), ¤t_transfer_size, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* serve_s2mm_transfer()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is as Follows:
|
||||
*
|
||||
* a --> Get from the Scatter/Gather List of the Destination Memory the Physical Address of the Current Page to Transfer.
|
||||
* b --> Set the Address Translation Register of the PCIe Bridge's Destination AXI BAR with the Physical Address of the Current Page to Transfer.
|
||||
* c --> Setup and Start the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
* 03 --> The Data Size of the S2MM Transfer.
|
||||
* 04 --> The Address of the Scatter/Gather List of the Destination Memory.
|
||||
* 05 --> The Address of the BCIe Bridge's Destination AXI BAR.
|
||||
* 06 --> The Offset in the PCIe Bridge of the Address Translation Register that Refers to the Source AXI BAR.
|
||||
* 07 --> The Current Value of the Page Counter in order to Know which Physical Address to Extract from the Source Scatter/Gather List.
|
||||
* 08 --> The Transfer Size for the Current Page which Might be Less than the Page Size.
|
||||
*/
|
||||
int serve_s2mm_transfer(/*01*/volatile ap_uint<32> *cfg,
|
||||
/*02*/unsigned int dma_device_address,
|
||||
/*03*/unsigned int src_data_size,
|
||||
/*04*/unsigned int sgl_address,
|
||||
/*05*/unsigned int axi_bar_dst_address,
|
||||
/*06*/unsigned int axi_bar_dst_cfg_address,
|
||||
/*07*/int page_counter,
|
||||
/*08*/ap_uint<32>current_transfer_size)
|
||||
{
|
||||
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Get the Physical Address of the Current Page of the Scatter/Gather List and Set the Destination AXI BAR of the PCIe Bridge
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the 64 Bit Physical Address of the Current Page from the Destination Scatter/Gather List.
|
||||
//The data_register_array[0] Holds the 32 LSBs of the Physical Address.
|
||||
//The data_register_array[1] Holds the 32 MSBs of the Physical Address.
|
||||
memcpy(data_register_array, (const ap_uint<32> *)(cfg + ((sgl_address + (page_counter * sizeof(ap_uint<64>))) / 4)), sizeof(ap_uint<64>));
|
||||
|
||||
data_register = data_register_array[0];
|
||||
//Write the 32 LSBs of the Physical Address of the Current Page to the Lower Register of the Destination AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_dst_cfg_address) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = data_register_array[1];
|
||||
//Write the 32 MSBs of the Physical Address of the Current Page to the Upper Register of the Destination AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_dst_cfg_address - 4) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start Device to DMA Transfer (S2MM)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (axi_bar_dst_address) of the Core the Destination Address that the DMA will Use to Read the Initial Image Data.
|
||||
//The Destination Address of the DMA S2MM Channel will be the Destination AXI BAR which Corresponds to the Physical Address of the Current Page.
|
||||
data_register = axi_bar_dst_address;
|
||||
|
||||
//Write the Destination Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the S2MM Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), ¤t_transfer_size, sizeof(ap_uint<32>));
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* serve_mm2s_interrupt()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is to Acknowledge Triggered Interrupts on the MM2S Channel of the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
*/
|
||||
int serve_mm2s_interrupt(volatile ap_uint<32> *cfg, unsigned int dma_device_address)
|
||||
{
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
//Read the DMA MM2S Status Register of the DMA to Get the IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the MM2S Status Register of the DMA which Acknowledges the Triggered Interrupts on the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* serve_s2mm_interrupt()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is to Acknowledge Triggered Interrupts on the S2MM Channel of the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
*/
|
||||
int serve_s2mm_interrupt(volatile ap_uint<32> *cfg, unsigned int dma_device_address)
|
||||
{
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
//Read the DMA S2MM Status Register of the DMA to Get the IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the S2MM Status Register of the DMA which Acknowledges the Triggered Interrupts on the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* dma_sg_pcie_scheduler() Top Function
|
||||
*
|
||||
* The Hardware Funtionality of the DMA SG PCIe Scheduler Core.
|
||||
*
|
||||
* The DMA SG PCIe Scheduler Core is Part of the Acceleration Group Scatter/Gather.
|
||||
* It is Used to Manage the the MM2S and S2MM Channels of the DMA when a Scatter/Gather List is Required to Transfer the Image Data.
|
||||
* The DMA SG PCIe Scheduler Interacts with the DMA of the Acceleration Group Scatter/Gather and the Configuration AXI Interface of the PCIe Bridge.
|
||||
*
|
||||
* The Sequential Steps of the Core's Functionality are as Follows:
|
||||
*
|
||||
* a --> Calculate the Number of Pages to Transfer for the MM2S and S2MM Channels of the DMA.
|
||||
* b --> Enable the DMA MM2S Interrupts.
|
||||
* c --> Enable the DMA S2MM Interrupts.
|
||||
* d --> Start a Page Transfer over the MM2S Channel (See the serve_mm2s_transfer() Function for Details).
|
||||
* e --> Start a Page Transfer over the S2MM Channel (See the serve_s2mm_transfer() Function for Details).
|
||||
* f --> Loop for as long as Both Channels Require to Complete the Transfer of all the Pages (both_done).
|
||||
* g --> In Every Loop Check if Either the MM2S or the S2MM Channels Have Triggered an Interrupt on Completion of the Page Transfer.
|
||||
* h --> If any of the Channels Triggers an Interrupt then Clear the Channel's Interrupt
|
||||
* (See the serve_mm2s_interrupt() and serve_s2MM_interrupt Functions for Details)
|
||||
* and Start the Channel's next Page Transfer.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA MM2S Channel.
|
||||
* 03 --------> Single Bit Input Used to Receive External Interrupts from the DMA S2MM Channel.
|
||||
* 04 to 12 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int dma_sg_pcie_scheduler(/*01*/volatile ap_uint<32> *cfg,
|
||||
/*02*/volatile ap_uint<1> *mm2s_intr_in,
|
||||
/*03*/volatile ap_uint<1> *s2mm_intr_in,
|
||||
/*04*/unsigned int dma_device_address,
|
||||
/*05*/unsigned int requested_data_size,
|
||||
/*06*/unsigned int page_size,
|
||||
/*07*/unsigned int mm2s_sgl_address,
|
||||
/*08*/unsigned int axi_bar_src_address,
|
||||
/*09*/unsigned int axi_bar_src_cfg_address,
|
||||
/*10*/unsigned int s2mm_sgl_address,
|
||||
/*11*/unsigned int axi_bar_dst_address,
|
||||
/*12*/unsigned int axi_bar_dst_cfg_address
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=cfg
|
||||
|
||||
/*
|
||||
* The mm2s_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA MM2S Channel.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=mm2s_intr_in
|
||||
|
||||
/*
|
||||
* The s2mm_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA S2MM Channel.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=s2mm_intr_in
|
||||
|
||||
/*
|
||||
* The dma_device_address is a Register to Store the Base Address of the DMA that this Core
|
||||
* will Need to Access through the cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_device_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The requested_data_size is a Register to Store the Size of the Data that will be Transferred.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=requested_data_size bundle=cfg
|
||||
|
||||
/*
|
||||
* The page_size is a Register to Store the Size of each Page(Usually 4K in Linux) that will be Transferred.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=page_size bundle=cfg
|
||||
|
||||
/*
|
||||
* The mm2s_sgl_address is a Register to Store the Address of the Scatter/Gather List of the Source Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=mm2s_sgl_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_src_address is a Register to Store the Address of the AXI BAR that the DMA will Use to Read the Source Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_src_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_src_cfg_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_src_cfg_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The s2mm_sgl_address is a Register to Store the Address of the Scatter/Gather List of the Destination Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=s2mm_sgl_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_dst_address is a Register to Store the Address of the AXI BAR that the DMA will Use to Write the Destination Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_dst_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_src_cfg_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_dst_cfg_address bundle=cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<32> mm2s_data_size; // The Data Size to Transfer for a Page of the MM2S Channel (The Last Page may not be Full).
|
||||
ap_uint<32> s2mm_data_size; // The Data Size to Transfer for a Page of the S2MM Channel (The Last Page may not be Full).
|
||||
|
||||
ap_uint<1> dma_mm2s_intr_in_value; // Used to Read the Last Value of the dma_mm2s_intr_in_value Input Port.
|
||||
ap_uint<1> dma_s2mm_intr_in_value; // Used to Read the Last Value of the dma_s2mm_intr_in_value Input Port.
|
||||
|
||||
int mm2s_pages_counter = 0; // Used to Count the Number of Tranferred Pages for the MM2S Channel.
|
||||
int s2mm_pages_counter = 0; // Used to Count the Number of Tranferred Pages for the S2MM Channel.
|
||||
|
||||
int mm2s_pages_number; // The Number of Pages to Transfer for the MM2S Channel.
|
||||
int s2mm_pages_number; // The Number of Pages to Transfer for the S2MM Channel.
|
||||
int mm2s_remaining_bytes; // Used to Count the Remaining Bytes of the MM2S Transfer.
|
||||
int s2mm_remaining_bytes; // Used to Count the Remaining Bytes of the S2MM Transfer.
|
||||
|
||||
ap_uint<32> current_transfer_size;
|
||||
|
||||
int both_done = 0; // Flag to Know When Both Channels (MM2S/S2MM) are Done.
|
||||
|
||||
|
||||
|
||||
//Divide the Size of the Data to Transfer by the Page Size to Get the Number of Pages to Transfer over the MM2S and S2mm Channels.
|
||||
mm2s_pages_number = requested_data_size / page_size;
|
||||
s2mm_pages_number = requested_data_size / page_size;
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the MM2S Pages Number Multiplied by the Page Size is Less than the Initial Data Size
|
||||
* then there is One More Page with Less Data than the Page Size.
|
||||
*
|
||||
* So, Increment mm2s_pages_number Variable by 1.
|
||||
*/
|
||||
if((mm2s_pages_number * page_size) < requested_data_size)
|
||||
{
|
||||
mm2s_pages_number = mm2s_pages_number + 1;
|
||||
}
|
||||
|
||||
|
||||
//Initialize the Remaining Bytes for the MM2S Channel to be Equal to the Data Transfer Size.
|
||||
mm2s_remaining_bytes = requested_data_size;
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the S2MM Pages Number Multiplied by the Page Size is Less than the Initial Data Size
|
||||
* then there is One More Page with Less Data than the Page Size.
|
||||
*
|
||||
* So, Increment s2mm_pages_number Variable by 1.
|
||||
*/
|
||||
if((s2mm_pages_number * page_size) < requested_data_size)
|
||||
{
|
||||
s2mm_pages_number = s2mm_pages_number + 1;
|
||||
}
|
||||
|
||||
|
||||
//Initialize the Remaining Bytes for the S2MM Channel to be Equal to the Data Transfer Size.
|
||||
s2mm_remaining_bytes = requested_data_size;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------
|
||||
* Enable the DMA MM2S Interrupts (DMA to Device)
|
||||
* ----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the MM2S Channel of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXIDMA_IRQ_ERROR_MASK | XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA to Enable the MM2S Interrupts.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------
|
||||
* Enable the DMA S2MM Interrupts (Device to DMA)
|
||||
* ----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the S2MM Channel of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXIDMA_IRQ_ERROR_MASK | XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA to Enable the S2MM Interrupts.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the MM2S Channel.
|
||||
*/
|
||||
if(mm2s_remaining_bytes >= page_size)
|
||||
{
|
||||
mm2s_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the MM2S Channel.
|
||||
*/
|
||||
if((mm2s_remaining_bytes > 0) && (mm2s_remaining_bytes < page_size))
|
||||
{
|
||||
mm2s_data_size = mm2s_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = mm2s_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the MM2S Channel
|
||||
//The Transfer Can be of Size Equal to a whole Page Size or Just the Remaining Bytes According to the current_transfer_size Variable.
|
||||
serve_mm2s_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
mm2s_sgl_address,
|
||||
axi_bar_src_address,
|
||||
axi_bar_src_cfg_address,
|
||||
0,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the MM2S Channel's Remaining Bytes According to the mm2s_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
mm2s_remaining_bytes = mm2s_remaining_bytes - mm2s_data_size;
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the S2MM Channel.
|
||||
*/
|
||||
if(s2mm_remaining_bytes >= page_size)
|
||||
{
|
||||
s2mm_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the S2MM Channel.
|
||||
*/
|
||||
if((s2mm_remaining_bytes > 0) && (s2mm_remaining_bytes < page_size))
|
||||
{
|
||||
s2mm_data_size = s2mm_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = s2mm_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the S2MM Channel
|
||||
//The Transfer Can be of Size Equal to a whole Page Size or Just the Remaining Bytes According to the current_transfer_size Variable.
|
||||
serve_s2mm_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
s2mm_sgl_address,
|
||||
axi_bar_dst_address,
|
||||
axi_bar_dst_cfg_address,
|
||||
0,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the S2MM Channel's Remaining Bytes According to the s2mm_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
s2mm_remaining_bytes = s2mm_remaining_bytes - s2mm_data_size;
|
||||
|
||||
|
||||
//Start Looping for as Long as the Rest of the Pages for the MM2S and S2MM Channels are Being Transferred.
|
||||
while(both_done < 2)
|
||||
{
|
||||
//Read the Current State of the mm2s_intr_in Input.
|
||||
dma_mm2s_intr_in_value = *mm2s_intr_in;
|
||||
|
||||
//Read the Current State of the s2mm_intr_in Input.
|
||||
dma_s2mm_intr_in_value = *s2mm_intr_in;
|
||||
|
||||
/*
|
||||
* If we Have an Interrupt from the MM2S Channel than we Should Clear the Interrupt and Start the Next Page Transfer.
|
||||
*/
|
||||
if(dma_mm2s_intr_in_value == 1)
|
||||
{
|
||||
//Acknowledge the Triggered Interrupt of the DMA MM2S Channel.
|
||||
serve_mm2s_interrupt(cfg, dma_device_address);
|
||||
|
||||
//If the MM2S Pages Counter of the Current Page Has Not Reached the Total Number of Pages then Proceed to Start the Next Page Transfer.
|
||||
if(mm2s_pages_counter < (mm2s_pages_number - 1))
|
||||
{
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the MM2S Channel.
|
||||
*/
|
||||
if(mm2s_remaining_bytes >= page_size)
|
||||
{
|
||||
mm2s_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the MM2S Channel.
|
||||
*/
|
||||
if((mm2s_remaining_bytes > 0) && (mm2s_remaining_bytes < page_size))
|
||||
{
|
||||
mm2s_data_size = mm2s_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = mm2s_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the MM2S Channel According to the current_transfer_size Variable.
|
||||
serve_mm2s_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
mm2s_sgl_address,
|
||||
axi_bar_src_address,
|
||||
axi_bar_src_cfg_address,
|
||||
mm2s_pages_counter + 1,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the MM2S Channel's Remaining Bytes According to the mm2s_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
mm2s_remaining_bytes = mm2s_remaining_bytes - mm2s_data_size;
|
||||
}
|
||||
|
||||
//Increment the MM2S Pages Counter to Keep Track of the Remaining MM2S Pages to Transfer.
|
||||
mm2s_pages_counter++;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* If we Have an Interrupt from the S2MM Channel than we Should Clear the Interrupt and Start the Next Page Transfer.
|
||||
*/
|
||||
if(dma_s2mm_intr_in_value == 1)
|
||||
{
|
||||
//Acknowledge the Triggered Interrupt of the DMA S2MM Channel.
|
||||
serve_s2mm_interrupt(cfg, dma_device_address);
|
||||
|
||||
//If the S2MM Pages Counter of the Current Page Has Not Reached the Total Number of Pages then Proceed to Start the Next Page Transfer.
|
||||
if(s2mm_pages_counter < (s2mm_pages_number - 1))
|
||||
{
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the S2MM Channel.
|
||||
*/
|
||||
if(s2mm_remaining_bytes >= page_size)
|
||||
{
|
||||
s2mm_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the S2MM Channel.
|
||||
*/
|
||||
if((s2mm_remaining_bytes > 0) && (s2mm_remaining_bytes < page_size))
|
||||
{
|
||||
s2mm_data_size = s2mm_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = s2mm_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the S2MM Channel According to the current_transfer_size Variable.
|
||||
serve_s2mm_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
s2mm_sgl_address,
|
||||
axi_bar_dst_address,
|
||||
axi_bar_dst_cfg_address,
|
||||
s2mm_pages_counter + 1,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the S2MM Channel's Remaining Bytes According to the s2mm_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
s2mm_remaining_bytes = s2mm_remaining_bytes - s2mm_data_size;
|
||||
|
||||
}
|
||||
|
||||
//Increment the S2MM Pages Counter to Keep Track of the Remaining S2MM Pages to Transfer.
|
||||
s2mm_pages_counter++;
|
||||
|
||||
}
|
||||
|
||||
//If the MM2S Pages Counter Has Reached the Total Number of Pages then the MM2S Channel Has Finished the Data Transfer.
|
||||
if(mm2s_pages_counter == (mm2s_pages_number))
|
||||
{
|
||||
//Increment the mm2s_pages_counter Variable so that will not Enter the Current if Condition Again.
|
||||
mm2s_pages_counter++;
|
||||
|
||||
//Increment the both_done Variable on Behalf of the MM2S Channel.
|
||||
//The both_done Variable will ONLY be Incremented Once on Behalf of the MM2S Channel because we will not Enter this if Condition Again.
|
||||
//When the S2MM Channel, also, Increments the both_done Variable the Data Transfer is Completed (both_done =2).
|
||||
both_done++;
|
||||
}
|
||||
|
||||
//If the S2MM Pages Counter Has Reached the Total Number of Pages then the S2MM Channel Has Finished the Data Transfer.
|
||||
if(s2mm_pages_counter == (s2mm_pages_number))
|
||||
{
|
||||
//Increment the s2mm_pages_counter Variable so that will not Enter the Current if Condition Again.
|
||||
s2mm_pages_counter++;
|
||||
|
||||
//Increment the both_done Variable on Behalf of the S2MM Channel.
|
||||
//The both_done Variable will ONLY be Incremented Once on Behalf of the S2MM Channel because we will not Enter this if Condition Again.
|
||||
//When the MM2S Channel, also, Increments the both_done Variable the Data Transfer is Completed (both_done =2).
|
||||
both_done++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Reset the Variables.
|
||||
dma_mm2s_intr_in_value = 0;
|
||||
dma_s2mm_intr_in_value = 0;
|
||||
both_done = 0;
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
17
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project DMA_SG_PCIe_Scheduler
|
||||
|
||||
set_top dma_sg_pcie_scheduler
|
||||
|
||||
add_files dma_sg_pcie_scheduler.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "DMA SG PCIe Scheduler" -version "1.0"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/.keep
Normal file
513
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.cpp
Normal file
513
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.cpp
Normal file
@@ -0,0 +1,513 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "fetch_scheduler.h"
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
#define XAXICDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXICDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
|
||||
|
||||
#define XAXICDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXICDMA_DSTADDR_OFFSET 0x00000020 // Destination Address Register.
|
||||
#define XAXICDMA_BTT_OFFSET 0x00000028 // Bytes to Transfer Register.
|
||||
|
||||
|
||||
|
||||
#define XAXICDMA_CR_RESET_MASK 0x00000004 // Reset CDMA Mask.
|
||||
|
||||
#define XAXICDMA_XR_IRQ_IOC_MASK 0x00001000 // Interrupt On Completion (IOC) Mask.
|
||||
#define XAXICDMA_XR_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ALL_MASK 0x00007000 // All Interrupt Mask.
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM/Shared Timer)
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 //Global Clock Counter 32 to 63 Bits (Upper).
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 //Global Clock Counter 0 to 31 Bits (Lower).
|
||||
|
||||
|
||||
/*
|
||||
* fetch_scheduler()
|
||||
*
|
||||
* The Hardware Funtionality of the Fetch Scheduler Core.
|
||||
*
|
||||
* The Fetch Scheduler Core Does not Belong to Any Particular Acceleration Group but it is Used by ALL(4) the Acceleration Groups Indirect (AGIs).
|
||||
* The Responsibility of this Core is to Manage the Procedure of Fetching Data to the DDR3 Memory that will be Processed by the AGIs.
|
||||
* It Checks its Scheduler Buffer in Round Robin for new Transfer Requests by any of the AGIs.
|
||||
* If it Finds Information for new Transfer it Starts the CDMA Fetch Core to Transfer Image Data from the Host's Memory to the FPGA's DDR3.
|
||||
* The Corresponding AGIs will be then Signaled by the Fetch Scheduler to Process the Image Data.
|
||||
*
|
||||
* When an AGI wants to Request Image Data from the Fetch Scheduler it Has to Write the Source and Destination Addresses as well as the Transfer Size
|
||||
* and, if Required, an Address Offset to the Scheduler Buffer that Belongs to the Fetch Scheduler.
|
||||
*
|
||||
* The Scheduler Buffer Has 4 Sets of Registers with 4 Registers for each Set.
|
||||
* The 4 Registers are Used to Store the Source Address, the Destination Address, the Transfer Size and an Address Offset (If Required) Respectively.
|
||||
* Each Set Corresponds to One of the 4 AGIs.
|
||||
*
|
||||
* When an AGI Writes the Above Information to the Scheduler Buffer, the Fetch Scheduler Starts a CDMA Transfer Accordingly
|
||||
* to Fetch the Image Data in the FPGA's DDR3.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* Start a for Loop of 4 Iterations where in each Iteration we Check for new CDMA Transfer Requests by each of the 4 AGIs Respectively.
|
||||
*
|
||||
* a --> Read the Data Size Register from the Current Set of Registers of the Scheduler Buffer.
|
||||
* If there is a Non-Zero Value then we Know that the Corresponding AGI Has Written the Required
|
||||
* Info (Source/Destination Address, Transfer Size, Addrress Offset) in Order to Request a Transfer by the CDMA Fetch.
|
||||
* If there is a Zero Value then we Check the Data Size Register of the Next Set for a Transfer Request by the Next AGI.
|
||||
* b --> Enable the Interrupts on the CDMA Fetch Core.
|
||||
* c --> Setup the CDMA with the Source and Destination Addresses.
|
||||
* If the Source Data Should be Fetched through the PCIe Bridge then Get the Source Address from the Scheduler Buffer and Set the
|
||||
* Address Translation Register of the Corresponding AXI BAR of the PCIe Bridge with this Address.
|
||||
* Then Set the Source Address Register of the CDMA Fetch Core to be the Corresponding AXI BAR.
|
||||
* If the Source Data Should not be Fetched through the PCIe Bridge then Just Set the Source Address Register of the CDMA Fetch Core
|
||||
* with the Source Address of the Scheduler Buffer.
|
||||
* d --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Fetch Transfer Started.
|
||||
* e --> Setup the Bytes to Transfer Register with the Transfer Size which Triggers the CDMA Fetch Transfer.
|
||||
* f --> Wait for an Interrupt by the CDMA Fetch on Completion of the Transfer.
|
||||
* g --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Fetch Transfer Ended.
|
||||
* h --> Acknowledge the CDMA Fetch Interrupt.
|
||||
* i --> Reset the CDMA Fetch Core.
|
||||
* j --> Re-Enable the Interrupts on the CDMA Fetch Core.
|
||||
* k --> Clear the Set of Registers of the Scheduler Buffer that Refer to the Current AGI.
|
||||
* l --> Send a Start Signal to the Acceleration Scheduler Indirect of the Corresponding AGI to Initiate the Acceleration Procedure.
|
||||
*
|
||||
* Repeat the Above Steps (a to l) for the Next Set of Registers of the Scheduler Buffer.
|
||||
*
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the CDMA Fetch Core.
|
||||
* 03 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI0.
|
||||
* 04 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI1.
|
||||
* 05 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI2.
|
||||
* 06 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI3.
|
||||
* 07 to 19 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*
|
||||
* NOTE datr in pcie_ctl_datr_address Stands for Dynamic Address Translator Register.
|
||||
*/
|
||||
int fetch_scheduler(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *cdma_intr_in,
|
||||
/*03*/volatile ap_uint<1> *start_0,
|
||||
/*04*/volatile ap_uint<1> *start_1,
|
||||
/*05*/volatile ap_uint<1> *start_2,
|
||||
/*06*/volatile ap_uint<1> *start_3,
|
||||
/*07*/unsigned int cdma_base_address,
|
||||
/*08*/unsigned int scheduler_buffer_base_address,
|
||||
/*09*/unsigned int src_address_first_reg_offset,
|
||||
/*10*/unsigned int dst_address_first_reg_offset,
|
||||
/*11*/unsigned int data_size_first_reg_offset,
|
||||
/*12*/unsigned int offset_first_reg_offset,
|
||||
/*13*/unsigned int step_offset,
|
||||
/*14*/unsigned int shared_apm_base_address,
|
||||
/*15*/unsigned int shared_metrics_base_address,
|
||||
/*16*/unsigned int axi_bar_base_address,
|
||||
/*17*/unsigned int pcie_ctl_datr_address,
|
||||
/*17*/unsigned int pcie_mode,
|
||||
/*19*/unsigned int accel_group_jump
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The cdma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the CDMA Fetch Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=cdma_intr_in
|
||||
|
||||
/*
|
||||
* The start_0 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI0.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_0
|
||||
|
||||
/*
|
||||
* The start_1 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI1.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_1
|
||||
|
||||
/*
|
||||
* The start_2 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI2.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_2
|
||||
|
||||
/*
|
||||
* The start_3 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI3.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_3
|
||||
|
||||
/*
|
||||
* The cdma_base_address is a Register to Store the Base Address of the CDMA Fetch that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=cdma_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address is a Register to Store the Base Address of the Scheduler Buffer that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_first_reg_offset is a Register to Store the Address Offset where the Source Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_first_reg_offset is a Register to Store the Address Offset where the Destination Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_first_reg_offset is a Register to Store the Address Offset where the Transfer Size Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_first_reg_offset is a Register to Store the Address Offset where the Offset Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The step_offset is a Register to Store the Number of Bytes to Jump inside the Scheduler Buffer
|
||||
* in order to Locate the Next Set of Registers.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=step_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_base_address is a Register to Store the Base Address of the Source AXI BAR of the PCIe Bridge that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Read the Image Data over the PCIe Bus.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_ctl_datr_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_ctl_datr_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_mode is a Register to Store a Value (0 or 1) that Indicates whether we Access the Source Image Data through the PCIe Bridge or not.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_mode bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The accel_group_jump is a Register to Store a Value that Helps to Access the Correct Metrics Structure in the Metrics Memory in order
|
||||
* to Store the Time Metrics that Refer to the Current AGI.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group_jump bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
int repeat;
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> irq; // Used to Temporalily Store the IRQ Mask.
|
||||
ap_uint<32> source_address_register; // Used to Temporalily Store the Value of the Source Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> destination_address_register; // Used to Temporalily Store the Value of the Destination Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> data_size_register; // Used to Temporalily Store the Value of the Data Size Register of the Scheduler Buffer.
|
||||
ap_uint<32> offset_register; // Used to Temporalily Store the Value of the Offset Register of the Scheduler Buffer.
|
||||
|
||||
ap_uint<32> address; // Used to Calculate an Address along with an Offset.
|
||||
|
||||
|
||||
ap_uint<1> cdma_intr_in_value; // Used to Read the Last Value of the cdma_intr_in_value Input Port.
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_gcc_l; // Store the CDMA Fetch Transfer Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_fetch_time_start_gcc_u; // Store the CDMA Fetch Transfer Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> cdma_fetch_time_end_gcc_l; // Store the CDMA Fetch Transfer End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_fetch_time_end_gcc_u; // Store the CDMA Fetch Transfer End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
/*
|
||||
* Start an Infinite Loop.
|
||||
*/
|
||||
while(1)
|
||||
{
|
||||
|
||||
/*
|
||||
* Make 4 Iterations and each Time Check the Current Set of Registers of the Scheduler Buffer for a New CDMA Fetch Transfer Request
|
||||
* by the AGI that Refers to the Current Set of Registers.
|
||||
*/
|
||||
for(repeat = 0; repeat < 4; repeat++)
|
||||
{
|
||||
//Read the Data Size Register of the Current Set of Registers of the Scheduler Buffer.
|
||||
memcpy(&data_size_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//If the Data Size Register is not Empty then we Have a New CDMA Fetch Transfer Request.
|
||||
//Else the Fetch Scheduler will Check the Data Size Register of the Next Set in the Next Iteration.
|
||||
if(data_size_register != 0)
|
||||
{
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------
|
||||
* Enable the Interrupts on the CDMA Fetch Core
|
||||
* --------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Fetch Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Fetch Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------
|
||||
* Setup the Source and Destination Address Registers of the CDMA Fetch Core
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//If the PCIe Mode is Enabled then the Source Data Should be Read through the PCIe Bridge.
|
||||
//This Mode Requires to Set the Address Tranlation Register of the Source AXI BAR of the PCI Bridge.
|
||||
if(pcie_mode == 1)
|
||||
{
|
||||
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Address Tranlation Register of the Source AXI BAR of the PCI Bridge with the Source Physical Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (pcie_ctl_datr_address) / 4), &source_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Source Address Register of the CDMA Fetch Core to be the Specified Source AXI BAR along with a Possible Offset.
|
||||
address = axi_bar_base_address + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
}
|
||||
//If the PCIe Mode is Disabled there is no Need to Set the Address Translation Registers of the PCIe Bridge.
|
||||
else
|
||||
{
|
||||
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Source Address Register of the CDMA Fetch Core with the Source Address along with the Offset Read from the Scheduler Buffer.
|
||||
address = source_address_register + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
|
||||
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Destination Address Register of the CDMA Fetch Core with the Destination Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &destination_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Fetch Transfer Start Time
|
||||
* ----------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Fetch Transfer Start Time.
|
||||
memcpy(&cdma_fetch_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Fetch Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_START_L_OFFSET) / 4), &cdma_fetch_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Fetch Transfer Start Time.
|
||||
memcpy(&cdma_fetch_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Fetch Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_START_U_OFFSET) / 4), &cdma_fetch_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------
|
||||
* Setup the Bytes To Transfer (BTT) Register of the CDMA Fetch Core which Triggers the Transfer
|
||||
* ---------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Set the Bytes To Tranfer Register of the CDMA Fetch Core with the Transfer Size in Bytes.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_BTT_OFFSET) / 4), &data_size_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Wait for a CDMA Interrupt
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the cdma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(cdma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
cdma_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Fetch Tranfer End Time
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Fetch Tranfer End Time.
|
||||
memcpy(&cdma_fetch_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Fetch Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_END_L_OFFSET) / 4), &cdma_fetch_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Fetch Tranfer End Time.
|
||||
memcpy(&cdma_fetch_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Fetch Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_END_U_OFFSET) / 4), &cdma_fetch_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the CDMA Fetch Interrupt
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Status Register of the CDMA Fetch Core which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXICDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
irq = data_register & XAXICDMA_XR_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the Status Register of the CDMA Fetch Core which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), &irq, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Reset the CDMA Fetch Core
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Write the Reset Mask to the Control Register of the CDMA Fetch Core in order to Reset the Core.
|
||||
data_register = XAXICDMA_CR_RESET_MASK;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------------
|
||||
* Re-Enable the Interrupts on the CDMA Fetch Core
|
||||
* -----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Fetch Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Fetch Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* Reset to Zero the 4 Registers of the Current Set of Registers of the Scheduler Buffer
|
||||
*/
|
||||
data_register = 0;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* Each Iteration Refers to a Specific AGI.
|
||||
* Check the Current Iteration Value and Start the Acceleration Scheduler Indirect of the Correct AGI.
|
||||
*/
|
||||
if(repeat == 0)
|
||||
{
|
||||
//Trigger the start_0 Signal for one Clock Cycle.
|
||||
*start_0 = 0;
|
||||
*start_0 = 1;
|
||||
}
|
||||
|
||||
if(repeat == 1)
|
||||
{
|
||||
//Trigger the start_1 Signal for one Clock Cycle.
|
||||
*start_1 = 0;
|
||||
*start_1 = 1;
|
||||
}
|
||||
|
||||
if(repeat == 2)
|
||||
{
|
||||
//Trigger the start_2 Signal for one Clock Cycle.
|
||||
*start_2 = 0;
|
||||
*start_2 = 1;
|
||||
}
|
||||
|
||||
if(repeat == 3)
|
||||
{
|
||||
//Trigger the start_3 Signal for one Clock Cycle.
|
||||
*start_3 = 0;
|
||||
*start_3 = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
126
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.h
Normal file
126
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.h
Normal file
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
17
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Fetch_Scheduler
|
||||
|
||||
set_top fetch_scheduler
|
||||
|
||||
add_files fetch_scheduler.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Fetch Scheduler" -version "1.0"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Info_Memory_Block/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Info_Memory_Block/.keep
Normal file
102
Hardware/Vivado_HLS_IPs/Info_Memory_Block/info_memory_block.cpp
Normal file
102
Hardware/Vivado_HLS_IPs/Info_Memory_Block/info_memory_block.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "info_memory_block.h"
|
||||
|
||||
|
||||
/*
|
||||
* info_memory_block()
|
||||
*
|
||||
* The Hardware Funtionality of the Info Memory Block Core.
|
||||
*
|
||||
* The Info Memory Block Core is Used to Aid the Acceleration Procedure of the Acceleration Groups Indirect (AGIs).
|
||||
* It is Accessed by the Acceleration Scheduler Indirect Cores of the AGIs as well as the Fetch and Send Schedulers.
|
||||
*
|
||||
* It Could be Considered as a Block of 16 Registers.
|
||||
* The Registers are Categorized in 4 Groups/Sets with 4 Registers in each Group/Set.
|
||||
*
|
||||
* Every Set of Registers Refers to one of the 4 AGIs.
|
||||
*
|
||||
* Set 0 Refers to AGI0.
|
||||
* Set 1 Refers to AGI1.
|
||||
* Set 2 Refers to AGI2.
|
||||
* Set 3 Refers to AGI3.
|
||||
*
|
||||
* The 4 Registers of Each Set Carry the Following Information:
|
||||
*
|
||||
* Register 0: Source Address.
|
||||
* Register 1: Destination Address.
|
||||
* Register 2: Data Size (Transfer Size).
|
||||
* Register 3: Address Offset.
|
||||
*
|
||||
* If an Acceleration Scheduler Indirect Requests a CDMA Transfer it Writes the Information Above to its own Set of Registers inside the Info Memory Block.
|
||||
* The Fetch or Send Scheduler Reads the Above Information from the Info Memory Block and Starts a CDMA Transfer Accordingly.
|
||||
*
|
||||
* The Function Parameters are the Input Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 to 16 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int info_memory_block(/*01*/unsigned int src_address_0,
|
||||
/*02*/unsigned int dst_address_0,
|
||||
/*03*/unsigned int data_size_0,
|
||||
/*04*/unsigned int offset_0,
|
||||
/*05*/unsigned int src_address_1,
|
||||
/*06*/unsigned int dst_address_1,
|
||||
/*07*/unsigned int data_size_1,
|
||||
/*08*/unsigned int offset_1,
|
||||
/*09*/unsigned int src_address_2,
|
||||
/*10*/unsigned int dst_address_2,
|
||||
/*11*/unsigned int data_size_2,
|
||||
/*12*/unsigned int offset_2,
|
||||
/*13*/unsigned int src_address_3,
|
||||
/*14*/unsigned int dst_address_3,
|
||||
/*15*/unsigned int data_size_3,
|
||||
/*16*/unsigned int offset_3
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the First Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_0 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_0 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_0 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_0 bundle=int_cfg
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the Second Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_1 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_1 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_1 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_1 bundle=int_cfg
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the Third Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_2 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_2 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_2 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_2 bundle=int_cfg
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the Fourth Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_3 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_3 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_3 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_3 bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
17
Hardware/Vivado_HLS_IPs/Info_Memory_Block/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Info_Memory_Block/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Info_Memory_Block
|
||||
|
||||
set_top info_memory_block
|
||||
|
||||
add_files info_memory_block.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Info Memory Block" -version "1.0"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Interrupt_Manager/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Interrupt_Manager/.keep
Normal file
206
Hardware/Vivado_HLS_IPs/Interrupt_Manager/interrupt_manager.cpp
Normal file
206
Hardware/Vivado_HLS_IPs/Interrupt_Manager/interrupt_manager.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "interrupt_manager.h"
|
||||
|
||||
|
||||
/*
|
||||
* interrupt_manager()
|
||||
*
|
||||
* The Hardware Funtionality of the Interrupt Manager Core.
|
||||
*
|
||||
* The Interrupt Manager Core is Developed to Handle and Forward the Completion Interrupts from the 7 Acceleration Groups.
|
||||
*
|
||||
* The Goal of each Acceleration Group is to Inform the Linux Kernel Driver for the Completion of the Acceleration Procedure.
|
||||
* The Communication of the FPGA with the Host System is Achieved through a PCIe Bus, thus, the Way to Signal the Driver is to Send MSI Interrupts.
|
||||
*
|
||||
* Sending a MSI is a Responsibility of the FPGA's PCIe Bridge.
|
||||
* The PCIe Bridge Carries a 5-Bit Input to Set the Vector Number of the MSI and a 1-Bit Input which is Used to Trigger the MSI According to the Vector Number.
|
||||
* In the Current Block Design the 2 Inputs of the PCIe Bridge are Connected with the Two Channels of a GPIO Peripheral.
|
||||
* This GPIO from now on will be Recognized as GPIO-MSI.
|
||||
* Writing Values in the Data Registers of the 2 Channels of the GPIO-MSI Leads to Triggering a MSI Interrupt.
|
||||
*
|
||||
* In Older Approaches the Acceleration Scheduler of each Acceleration Group would Simply Access the GPIO-MSI to Send MSI Interrupts on Completion of an Image Process.
|
||||
* This Approach was Proved to be Unreliable Since the Concurrent Access to the GPIO-MSI by Multiple Acceleration Groups
|
||||
* Could Lead to Possible Loss of Interrupts that were NEVER Transmitted.
|
||||
*
|
||||
* The new Approach to Ensure Zero Loss of Interrupts was to Develop the Current Interrupt Manager.
|
||||
* The Interrupt Manager Includes an Array of 7 Registers where each Register Refers to each of the 7 Acceleration Groups.
|
||||
*
|
||||
* Register_Array[0] Refers to AGD0
|
||||
* Register_Array[1] Refers to AGD1
|
||||
* Register_Array[2] Refers to AGI0
|
||||
* Register_Array[3] Refers to AGI1
|
||||
* Register_Array[4] Refers to AGI2
|
||||
* Register_Array[5] Refers to AGI3
|
||||
* Register_Array[6] Refers to AGSG
|
||||
*
|
||||
* When an Acceleration Scheduler of any of the Acceleration Groups Requires to Send an MSI Interrupt for the Completion of its Acceleration Procedure
|
||||
* it Simply Writes a Vector Number Value to the Corresponding Field of the Register Array of the Interrupt Manager as a MSI Request.
|
||||
* The Kernel Driver Identifies the Acceleration Group that "Sent" the MSI by the Vector Number.
|
||||
*
|
||||
* Vector Number:0 --> AGD0
|
||||
* Vector Number:1 --> AGD1
|
||||
* Vector Number:2 --> AGI0
|
||||
* Vector Number:3 --> AGI1
|
||||
* Vector Number:4 --> AGI2
|
||||
* Vector Number:5 --> AGI3
|
||||
* Vector Number:6 --> AGSG
|
||||
*
|
||||
* The Interrupt Manager Checks in a Round Robin Manner the Fields of the Register Array for a Non-Zero Value which Indicates a new MSI Request.
|
||||
* This Makes it Obvious that the Acceleration Schedulers Write to the Register Array of the Interrupt Manager the Vector Number Incremented by 1.
|
||||
* This is Done to Avoid Zero Values that are not Identified by the Interrupt Manager as MSI Requests.
|
||||
*
|
||||
* If the Interrupt Manager Finds a Field of the Register Array with Non-Zero Value then it Decreases this Value by 1 in order to Produce
|
||||
* the Correct Vector Number and Writes this Value to the GPIO-MSI Peripheral to Trigger the MSI Interrupt.
|
||||
*
|
||||
* The Interrupt Manager, then, Waits until it Receives an Acknowledgment Signal from the Kernel Driver before Checking for another MSI Request.
|
||||
* The Kernel Driver, actually, Writes a Logic 1 Value to another GPIO Peripheral whose 1-Bit Output Signals the Interrupt Manager.
|
||||
* This GPIO Peripheral from now on will be Recognized as GPIO-ACK.
|
||||
*
|
||||
*
|
||||
* The Sequential Steps of the Interrupt Management are as Follows:
|
||||
*
|
||||
* Start a for Loop with 7 Iterations where each Iteration is to Check for a MSI Request by the Corresponding Acceleration Group.
|
||||
* NOTE Enabling the Auto Restart Mode of the Current Core will Lead to Starting Over the for Loop.
|
||||
*
|
||||
* a --> Check if the Current Field of the Register Array Has a Non-Zero Value.
|
||||
* If this is the Case Proceed to Send a MSI Interrupt.
|
||||
* b --> Decrease the Value of the Current Field of the Register Array to Get the Correct Vector Number.
|
||||
* c --> Write the Vector Number to the GPIO-MSI Peripheral that is Connected with the PCIe Bridge to Trigger an MSI Interrupt.
|
||||
* d --> Wait for an Acknowledgment Signal from the Driver through the GPIO-ACK Peripheral.
|
||||
* e --> Self-Clear to Zero the Current Field of the Register Array of the Interrupt Manager.
|
||||
* The Next Time we Find a Non-Zero Value in this Field we Know that an Acceleration Group Has Made a Valid MSI Request.
|
||||
* f --> Clear the Data Register of the GPIO-ACk Peripheral.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Acknowledgements from the Linux Kernel Driver.
|
||||
* 03 to 06 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int interrupt_manager(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *intr_ack,
|
||||
/*03*/unsigned int gpio_msi_device_address,
|
||||
/*04*/unsigned int gpio_ack_device_address,
|
||||
/*05*/unsigned int self_msi_request_offset,
|
||||
/*06*/unsigned int msi_request[7]
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The gpio_msi_device_address is a Register to Store the Base Address of the GPIO-MSI that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=gpio_msi_device_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The gpio_ack_device_address is a Register to Store the Base Address of the GPIO-ACK that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=gpio_ack_device_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The self_msi_request_offset is a Register to Store the Address Offset where the Register Array (msi_request) is Located.
|
||||
* This Address Offset Actually Leads the Interrupt Manager to Access its Own Configuration Registers through its AXI Slave Lite (cfg) Interface.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=self_msi_request_offset bundle=cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=msi_request bundle=cfg
|
||||
|
||||
/*
|
||||
* The intr_ack is a Single Bit Input which is Used to Receive External Acknowledgements from the Linux Kernel Driver.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=intr_ack
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=cfg
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<1> intr_ack_value; // Used to Read the Last Value of the intr_ack Input Port.
|
||||
|
||||
|
||||
for(int repeat = 0; repeat < 7; repeat++)
|
||||
{
|
||||
|
||||
//If the Current Field of the Register Array (msi_request) Has a Non-Zero Value then we Have a Valid MSI Request by the Corresponding Acceleration Group.
|
||||
if(msi_request[repeat] != 0)
|
||||
{
|
||||
/*
|
||||
* ---------------------------------------------------------
|
||||
* Send a MSI Interrupt by Writing to the GPIO-MSI Registers
|
||||
* ---------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Decrease the Value of the Current Field of the Register Array to Get the Correct Vector Number.
|
||||
data_register = msi_request[repeat] - 1;
|
||||
|
||||
//Write the Vector Number to the Data Register of the Second Channel of the GPIO-MSI.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_2_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Write a Logic 1 Value to the Data Register of the First Channel of the GPIO-MSI to Trigger the MSI Interrupt.
|
||||
data_register = 0x1;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Data Register of the First Channel of the GPIO-MSI back to Zero.
|
||||
data_register = 0x0;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Wait for a Interrupt Acknowledgement
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the intr_ack Input.
|
||||
intr_ack_value = *intr_ack;
|
||||
|
||||
//Keep Looping for as long as the intr_ack Input Does not Reach a Logic 1 Value.
|
||||
while(intr_ack_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the intr_ack Input.
|
||||
intr_ack_value = *intr_ack;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
intr_ack_value = 0;
|
||||
|
||||
data_register = 0x0;
|
||||
|
||||
/* ---------------------------------------------------------------------------
|
||||
* Self-Clear the Current Field of the Register Array of the Interrupt Manager
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Write a Zero Value to the Current Field of the Register Array of the Interrupt Manager to Clear the Field.
|
||||
//NOTE the Interrupt Manager Herein Uses its AXI Master Interface to Write to its own AXI Slave Lite Interface.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (self_msi_request_offset + (repeat * 4)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Clear the GPIO-ACK Peripheral
|
||||
* -----------------------------
|
||||
*/
|
||||
|
||||
//Clear the GPIO-ACK by Writing a Zero Value to its Data Register.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_ack_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* ---------------------------------------------------
|
||||
* Registers and Offsets of the Xilinx GPIO Peripheral
|
||||
* ---------------------------------------------------
|
||||
*/
|
||||
|
||||
#define XGPIO_CHANNEL_1_OFFSET 0x0 // GPIO Channel 1 Base Offset.
|
||||
#define XGPIO_CHANNEL_2_OFFSET 0x8 // GPIO Channel 2 Base Offset.
|
||||
|
||||
/*
|
||||
* GPIO Channel 1 Data Register.
|
||||
*
|
||||
* The Data Register of GPIO Channel 2 is XGPIO_DATA_OFFSET + XGPIO_CHANNEL_2_OFFSET.
|
||||
*/
|
||||
#define XGPIO_DATA_OFFSET 0x0
|
||||
|
||||
17
Hardware/Vivado_HLS_IPs/Interrupt_Manager/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Interrupt_Manager/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Interrupt_Manager
|
||||
|
||||
set_top interrupt_manager
|
||||
|
||||
add_files interrupt_manager.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Interrupt Manager" -version "3.5"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Send_Scheduler/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Send_Scheduler/.keep
Normal file
17
Hardware/Vivado_HLS_IPs/Send_Scheduler/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Send_Scheduler/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Send_Scheduler
|
||||
|
||||
set_top send_scheduler
|
||||
|
||||
add_files send_scheduler.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Send Scheduler" -version "3.0"
|
||||
|
||||
exit
|
||||
476
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.cpp
Normal file
476
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.cpp
Normal file
@@ -0,0 +1,476 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "send_scheduler.h"
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
#define XAXICDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXICDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
|
||||
|
||||
#define XAXICDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXICDMA_DSTADDR_OFFSET 0x00000020 // Destination Address Register.
|
||||
#define XAXICDMA_BTT_OFFSET 0x00000028 // Bytes to Transfer Register.
|
||||
|
||||
|
||||
|
||||
#define XAXICDMA_CR_RESET_MASK 0x00000004 // Reset CDMA Mask.
|
||||
|
||||
#define XAXICDMA_XR_IRQ_IOC_MASK 0x00001000 // Interrupt On Completion (IOC) Mask.
|
||||
#define XAXICDMA_XR_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ALL_MASK 0x00007000 // All Interrupt Mask.
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM/Shared Timer)
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 //Global Clock Counter 32 to 63 Bits (Upper).
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 //Global Clock Counter 0 to 31 Bits (Lower).
|
||||
|
||||
/*
|
||||
* send_scheduler()
|
||||
*
|
||||
* The Hardware Funtionality of the Send Scheduler Core.
|
||||
*
|
||||
* The Send Scheduler Core Does not Belong to Any Particular Acceleration Group but it is Used by ALL(4) the Acceleration Groups Indirect (AGIs).
|
||||
* The Responsibility of this Core is to Manage the Procedure of Sending the Processed Data by the AGIs back to the Host's Memory.
|
||||
* It Checks its Scheduler Buffer in Round Robin for new Transfer Requests by any of the AGIs.
|
||||
* If it Finds Information for new Transfer it Starts the CDMA Send Core to Transfer Processed Image Data from the FPGA's DDR3 to the Host's Memory.
|
||||
* The Interrupt Manager will then be Informed about the Completion of the CDMA Send Transfer which is, also, the Completion of the Acceleration Procedure of the Corresponding AGI.
|
||||
*
|
||||
* When an AGI wants to Request a Transfer of Processed Image Data from the Send Scheduler it Has to Write the Source and Destination Addresses as well as the Transfer Size
|
||||
* and, if Required, an Address Offset to the Scheduler Buffer that Belongs to the Send Scheduler.
|
||||
*
|
||||
* The Scheduler Buffer Has 4 Sets of Registers with 4 Registers for each Set.
|
||||
* The 4 Registers are Used to Store the Source Address, the Destination Address, the Transfer Size and an Address Offset (If Required) Respectively.
|
||||
* Each Set Corresponds to One of the 4 AGIs.
|
||||
*
|
||||
* When an AGI Writes the Above Information to the Scheduler Buffer, the Send Scheduler Starts a CDMA Transfer Accordingly
|
||||
* to Send the Processed Image Data back to the Host's Memory.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* Start a for Loop of 4 Iterations where in each Iteration we Check for new CDMA Transfer Requests by each of the 4 AGIs Respectively.
|
||||
*
|
||||
* a --> Read the Data Size Register from the Current Set of Registers of the Scheduler Buffer.
|
||||
* If there is a Non-Zero Value then we Know that the Corresponding AGI Has Written the Required
|
||||
* Info (Source/Destination Address, Transfer Size, Addrress Offset) in Order to Request a Transfer by the CDMA Send.
|
||||
* If there is a Zero Value then we Check the Data Size Register of the Next Set for a Transfer Request by the Next AGI.
|
||||
* b --> Enable the Interrupts on the CDMA Send Core.
|
||||
* c --> Setup the CDMA with the Source and Destination Addresses.
|
||||
* If the Destination Data Should be Sent through the PCIe Bridge then Get the Destination Address from the Scheduler Buffer and Set the
|
||||
* Address Translation Register of the Corresponding AXI BAR of the PCIe Bridge with this Address.
|
||||
* Then Set the Destination Address Register of the CDMA Send Core to be the Corresponding AXI BAR.
|
||||
* If the Destination Data Should not be Sent through the PCIe Bridge then Just Set the Destination Address Register of the CDMA Send Core
|
||||
* with the Destination Address of the Scheduler Buffer.
|
||||
* d --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Send Transfer Started.
|
||||
* e --> Setup the Bytes to Transfer Register with the Transfer Size which Triggers the CDMA Send Transfer.
|
||||
* f --> Wait for an Interrupt by the CDMA Send on Completion of the Transfer.
|
||||
* g --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Send Transfer Ended.
|
||||
* h --> Acknowledge the CDMA Send Interrupt.
|
||||
* i --> Reset the CDMA Send Core.
|
||||
* j --> Re-Enable the Interrupts on the CDMA Send Core.
|
||||
* k --> Clear the Set of Registers of the Scheduler Buffer that Refer to the Current AGI.
|
||||
* l --> Inform the Interrupt Manager About the Completion of the CDMA Send Tranfer which is, also, the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* Repeat the Above Steps (a to l) for the Next Set of Registers of the Scheduler Buffer.
|
||||
*
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the CDMA Send Core.
|
||||
* 03 to 16 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*
|
||||
* NOTE datr in pcie_ctl_datr_address Stands for Dynamic Address Translator Register.
|
||||
*/
|
||||
int send_scheduler(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *cdma_intr_in,
|
||||
/*03*/unsigned int cdma_base_address,
|
||||
/*04*/unsigned int scheduler_buffer_base_address,
|
||||
/*05*/unsigned int src_address_first_reg_offset,
|
||||
/*06*/unsigned int dst_address_first_reg_offset,
|
||||
/*07*/unsigned int data_size_first_reg_offset,
|
||||
/*08*/unsigned int offset_first_reg_offset,
|
||||
/*09*/unsigned int step_offset,
|
||||
/*10*/unsigned int shared_apm_base_address,
|
||||
/*11*/unsigned int shared_metrics_base_address,
|
||||
/*12*/unsigned int axi_bar_base_address,
|
||||
/*13*/unsigned int pcie_ctl_datr_address,
|
||||
/*14*/unsigned int pcie_mode,
|
||||
/*15*/unsigned int interrupt_manager_register_offset,
|
||||
/*16*/unsigned int accel_group_jump
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The cdma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the CDMA Send Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=cdma_intr_in
|
||||
|
||||
/*
|
||||
* The cdma_base_address is a Register to Store the Base Address of the CDMA Send that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=cdma_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address is a Register to Store the Base Address of the Scheduler Buffer that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_first_reg_offset is a Register to Store the Address Offset where the Source Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_first_reg_offset is a Register to Store the Address Offset where the Destination Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_first_reg_offset is a Register to Store the Address Offset where the Transfer Size Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_first_reg_offset is a Register to Store the Address Offset where the Offset Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The step_offset is a Register to Store the Number of Bytes to Jump inside the Scheduler Buffer
|
||||
* in order to Locate the Next Set of Registers.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=step_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_base_address is a Register to Store the Base Address of the Destination AXI BAR of the PCIe Bridge that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Processed Image Data over the PCIe Bus.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_ctl_datr_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Destination AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_ctl_datr_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_mode is a Register to Store a Value (0 or 1) that Indicates whether we Access the Destination Image Data through the PCIe Bridge or not.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_mode bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The accel_group_jump is a Register to Store a Value that Helps to Access the Correct Metrics Structure in the Metrics Memory in order
|
||||
* to Store the Time Metrics that Refer to the Current AGI.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group_jump bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
int repeat;
|
||||
|
||||
ap_uint<1> cdma_intr_in_value; // Used to Read the Last Value of the cdma_intr_in_value Input Port.
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> irq; // Used to Temporalily Store the IRQ Mask.
|
||||
ap_uint<32> source_address_register; // Used to Temporalily Store the Value of the Source Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> destination_address_register; // Used to Temporalily Store the Value of the Destination Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> data_size_register; // Used to Temporalily Store the Value of the Data Size Register of the Scheduler Buffer.
|
||||
ap_uint<32> offset_register; // Used to Temporalily Store the Value of the Offset Register of the Scheduler Buffer.
|
||||
|
||||
ap_uint<32> address; // Used to Calculate an Address along with an Offset.
|
||||
|
||||
ap_uint<32> cdma_send_time_start_gcc_l; // Store the CDMA Send Transfer Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_send_time_start_gcc_u; // Store the CDMA Send Transfer Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> cdma_send_time_end_gcc_l; // Store the CDMA Send Transfer End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_send_time_end_gcc_u; // Store the CDMA Send Transfer End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
|
||||
/*
|
||||
* Start an Infinite Loop.
|
||||
*/
|
||||
while(1)
|
||||
{
|
||||
|
||||
/*
|
||||
* Make 4 Iterations and each Time Check the Current Set of Registers of the Scheduler Buffer for a New CDMA Send Transfer Request
|
||||
* by the AGI that Refers to the Current Set of Registers.
|
||||
*/
|
||||
for(repeat = 0; repeat < 4; repeat++)
|
||||
{
|
||||
//Read the Data Size Register of the Current Set of Registers of the Scheduler Buffer.
|
||||
memcpy(&data_size_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//If the Data Size Register is not Empty then we Have a New CDMA Send Transfer Request.
|
||||
//Else the Send Scheduler will Check the Data Size Register of the Next Set in the Next Iteration.
|
||||
if(data_size_register != 0)
|
||||
{
|
||||
|
||||
/*
|
||||
* --------------------------------------------
|
||||
* Enable the Interrupts on the CDMA Send Core
|
||||
* --------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Send Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Send Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------
|
||||
* Setup the Source and Destination Address Registers of the CDMA Send Core
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Source Address Register of the CDMA Send Core with the Source Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &source_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
//If the PCIe Mode is Enabled then the Destination Data Should be Written through the PCIe Bridge.
|
||||
//This Mode Requires to Set the Address Tranlation Register of the Destination AXI BAR of the PCI Bridge.
|
||||
if(pcie_mode == 1)
|
||||
{
|
||||
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Address Tranlation Register of the Destination AXI BAR of the PCI Bridge with the Destination Physical Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (pcie_ctl_datr_address) / 4), &destination_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Destination Address Register of the CDMA Send Core to be the Specified Destination AXI BAR along with a Possible Offset.
|
||||
address = axi_bar_base_address + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
//If the PCIe Mode is Disabled there is no Need to Set the Address Translation Registers of the PCIe Bridge.
|
||||
else
|
||||
{
|
||||
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Destination Address Register of the CDMA Send Core with the Destination Address along with the Offset Read from the Scheduler Buffer.
|
||||
address = destination_address_register + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Send Transfer Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Send Transfer Start Time.
|
||||
memcpy(&cdma_send_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Send Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_START_L_OFFSET) / 4), &cdma_send_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Send Transfer Start Time.
|
||||
memcpy(&cdma_send_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Send Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_START_U_OFFSET) / 4), &cdma_send_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------------------------
|
||||
* Setup the Bytes To Transfer (BTT) Register of the CDMA Send Core which Triggers the Transfer
|
||||
* --------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Set the Bytes To Tranfer Register of the CDMA Send Core with the Transfer Size in Bytes.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_BTT_OFFSET) / 4), &data_size_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Wait for a CDMA Interrupt
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the cdma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(cdma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
cdma_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Send Tranfer End Time
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Send Tranfer End Time.
|
||||
memcpy(&cdma_send_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Send Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_END_L_OFFSET) / 4), &cdma_send_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Send Tranfer End Time.
|
||||
memcpy(&cdma_send_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Send Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_END_U_OFFSET) / 4), &cdma_send_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the CDMA Send Interrupt
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Status Register of the CDMA Send Core which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXICDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
irq = data_register & XAXICDMA_XR_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the Status Register of the CDMA Send Core which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), &irq, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Reset the CDMA Send Core
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Write the Reset Mask to the Control Register of the CDMA Send Core in order to Reset the Core.
|
||||
data_register = XAXICDMA_CR_RESET_MASK;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -----------------------------------------------
|
||||
* Re-Enable the Interrupts on the CDMA Send Core
|
||||
* -----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Send Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Send Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* Reset to Zero the 4 Registers of the Current Set of Registers of the Scheduler Buffer
|
||||
*/
|
||||
data_register = 0;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* If the PCIe Mode is Enabled then we Have to Inform the Interrupt Manager to Send a MSI Interrupt Since the CDMA Send Transfer is Complete
|
||||
* which, also, Means that the Acceleration Procedure is Complete.
|
||||
*/
|
||||
if(pcie_mode == 1)
|
||||
{
|
||||
//The Current Iteration Value along with the accel_group_jump Value Indicate the Acceleration Group Number of the Acceleration Group Indirect which Can be 2, 3, 4 or 5.
|
||||
//2 is AGI0.
|
||||
//3 is AGI1.
|
||||
//4 is AGI2.
|
||||
//5 is AGI3.
|
||||
data_register = repeat + accel_group_jump;
|
||||
|
||||
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (interrupt_manager_register_offset + (repeat * 4)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
126
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.h
Normal file
126
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.h
Normal file
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
0
Hardware/Vivado_HLS_IPs/Sobel_Filter/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Sobel_Filter/.keep
Normal file
74
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_axi_sdata2.h
Normal file
74
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_axi_sdata2.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*******************************************************************************
|
||||
Vendor: Xilinx
|
||||
Associated Filename: ap_axi_sdata.h
|
||||
Purpose: AXI data type for AutoESL
|
||||
Revision History: February 13, 2012 - initial release
|
||||
|
||||
*******************************************************************************
|
||||
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
|
||||
|
||||
This file contains confidential and proprietary information of Xilinx, Inc. and
|
||||
is protected under U.S. and international copyright and other intellectual
|
||||
property laws.
|
||||
|
||||
DISCLAIMER
|
||||
This disclaimer is not a license and does not grant any rights to the materials
|
||||
distributed herewith. Except as otherwise provided in a valid license issued to
|
||||
you by Xilinx, and to the maximum extent permitted by applicable law:
|
||||
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
|
||||
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
|
||||
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
|
||||
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
|
||||
in contract or tort, including negligence, or under any other theory of
|
||||
liability) for any loss or damage of any kind or nature related to, arising under
|
||||
or in connection with these materials, including for any direct, or any indirect,
|
||||
special, incidental, or consequential loss or damage (including loss of data,
|
||||
profits, goodwill, or any type of loss or damage suffered as a result of any
|
||||
action brought by a third party) even if such damage or loss was reasonably
|
||||
foreseeable or Xilinx had been advised of the possibility of the same.
|
||||
|
||||
CRITICAL APPLICATIONS
|
||||
Xilinx products are not designed or intended to be fail-safe, or for use in any
|
||||
application requiring fail-safe performance, such as life-support or safety
|
||||
devices or systems, Class III medical devices, nuclear facilities, applications
|
||||
related to the deployment of airbags, or any other applications that could lead
|
||||
to death, personal injury, or severe property or environmental damage
|
||||
(individually and collectively, "Critical Applications"). Customer assumes the
|
||||
sole risk and liability of any use of Xilinx products in Critical Applications,
|
||||
subject only to applicable laws and regulations governing limitations on product
|
||||
liability.
|
||||
|
||||
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
|
||||
ALL TIMES.
|
||||
|
||||
*******************************************************************************/
|
||||
#ifndef __AP__AXI_SDATA__
|
||||
#define __AP__AXI_SDATA__
|
||||
|
||||
#define AP_INT_MAX_W 4096
|
||||
#include "ap_int.h"
|
||||
|
||||
template<int D,int U,int TI,int TD>
|
||||
struct ap_axis2{
|
||||
ap_int<D> data;
|
||||
ap_int<(D+7)/8> strb;
|
||||
ap_int<U> user;
|
||||
ap_int<1> last;
|
||||
// ap_int<TI> tid;
|
||||
ap_int<TD> tdest;
|
||||
};
|
||||
|
||||
template<int D,int U,int TI,int TD>
|
||||
struct ap_axiu2{
|
||||
ap_uint<D> data;
|
||||
ap_uint<(D+7)/8> strb;
|
||||
ap_uint<U> user;
|
||||
ap_uint<1> last;
|
||||
//ap_uint<TI> tid;
|
||||
ap_uint<TD> tdest;
|
||||
};
|
||||
|
||||
//typedef ap_axis<int D, int U, int TI, int TD> ap_axis_unsigned<int D, int U, int TI, int TD>;
|
||||
|
||||
|
||||
#endif
|
||||
99
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_bmp.h
Normal file
99
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_bmp.h
Normal file
@@ -0,0 +1,99 @@
|
||||
/*******************************************************************************
|
||||
Vendor: Xilinx
|
||||
Associated Filename: ap_bmp.h
|
||||
Purpose: BMP image reader and writer header file for AutoESL
|
||||
Revision History: February 13, 2012 - initial release
|
||||
|
||||
*******************************************************************************
|
||||
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
|
||||
|
||||
This file contains confidential and proprietary information of Xilinx, Inc. and
|
||||
is protected under U.S. and international copyright and other intellectual
|
||||
property laws.
|
||||
|
||||
DISCLAIMER
|
||||
This disclaimer is not a license and does not grant any rights to the materials
|
||||
distributed herewith. Except as otherwise provided in a valid license issued to
|
||||
you by Xilinx, and to the maximum extent permitted by applicable law:
|
||||
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
|
||||
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
|
||||
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
|
||||
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
|
||||
in contract or tort, including negligence, or under any other theory of
|
||||
liability) for any loss or damage of any kind or nature related to, arising under
|
||||
or in connection with these materials, including for any direct, or any indirect,
|
||||
special, incidental, or consequential loss or damage (including loss of data,
|
||||
profits, goodwill, or any type of loss or damage suffered as a result of any
|
||||
action brought by a third party) even if such damage or loss was reasonably
|
||||
foreseeable or Xilinx had been advised of the possibility of the same.
|
||||
|
||||
CRITICAL APPLICATIONS
|
||||
Xilinx products are not designed or intended to be fail-safe, or for use in any
|
||||
application requiring fail-safe performance, such as life-support or safety
|
||||
devices or systems, Class III medical devices, nuclear facilities, applications
|
||||
related to the deployment of airbags, or any other applications that could lead
|
||||
to death, personal injury, or severe property or environmental damage
|
||||
(individually and collectively, "Critical Applications"). Customer assumes the
|
||||
sole risk and liability of any use of Xilinx products in Critical Applications,
|
||||
subject only to applicable laws and regulations governing limitations on product
|
||||
liability.
|
||||
|
||||
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
|
||||
ALL TIMES.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __XLNX__BITMAP__
|
||||
#define __XLNX__BITMAP__
|
||||
|
||||
// Basic color definitions
|
||||
#define BLACK 0
|
||||
#define WHITE 255
|
||||
|
||||
// Maximum image size
|
||||
#define MAX_ROWS 1080
|
||||
#define MAX_COLS 1920
|
||||
|
||||
//File Information Header
|
||||
typedef struct{
|
||||
unsigned short FileType;
|
||||
unsigned int FileSize;
|
||||
unsigned short Reserved1;
|
||||
unsigned short Reserved2;
|
||||
unsigned short Offset;
|
||||
}BMPHeader;
|
||||
|
||||
typedef struct{
|
||||
unsigned int Size;
|
||||
unsigned int Width;
|
||||
unsigned int Height;
|
||||
unsigned short Planes;
|
||||
unsigned short BitsPerPixel;
|
||||
unsigned int Compression;
|
||||
unsigned int SizeOfBitmap;
|
||||
unsigned int HorzResolution;
|
||||
unsigned int VertResolution;
|
||||
unsigned int ColorsUsed;
|
||||
unsigned int ColorsImportant;
|
||||
}BMPImageHeader;
|
||||
|
||||
typedef struct{
|
||||
BMPHeader *file_header;
|
||||
BMPImageHeader *image_header;
|
||||
unsigned int *colors;
|
||||
unsigned char *data;
|
||||
unsigned char R[MAX_ROWS][MAX_COLS];
|
||||
unsigned char G[MAX_ROWS][MAX_COLS];
|
||||
unsigned char B[MAX_ROWS][MAX_COLS];
|
||||
unsigned char Y[MAX_ROWS][MAX_COLS];
|
||||
char U[MAX_ROWS][MAX_COLS];
|
||||
char V[MAX_ROWS][MAX_COLS];
|
||||
}BMPImage;
|
||||
|
||||
//Read Function
|
||||
int BMP_Read(char *file, int row, int col, unsigned char *R, unsigned char *G, unsigned char *B);
|
||||
|
||||
//Write Function
|
||||
int BMP_Write(char *file, int row, int col, unsigned char *R, unsigned char *G, unsigned char *B);
|
||||
|
||||
#endif
|
||||
341
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_video.h
Normal file
341
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_video.h
Normal file
@@ -0,0 +1,341 @@
|
||||
/*******************************************************************************
|
||||
Vendor: Xilinx
|
||||
Associated Filename: ap_video.h
|
||||
Purpose: Video datatype header file for AutoESL
|
||||
Revision History: February 13, 2012 - initial release
|
||||
January 28, 2015 - Caes-lab TEI Crete revised
|
||||
*******************************************************************************
|
||||
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
|
||||
|
||||
This file contains confidential and proprietary information of Xilinx, Inc. and
|
||||
is protected under U.S. and international copyright and other intellectual
|
||||
property laws.
|
||||
|
||||
DISCLAIMER
|
||||
This disclaimer is not a license and does not grant any rights to the materials
|
||||
distributed herewith. Except as otherwise provided in a valid license issued to
|
||||
you by Xilinx, and to the maximum extent permitted by applicable law:
|
||||
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
|
||||
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
|
||||
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
|
||||
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
|
||||
in contract or tort, including negligence, or under any other theory of
|
||||
liability) for any loss or damage of any kind or nature related to, arising under
|
||||
or in connection with these materials, including for any direct, or any indirect,
|
||||
special, incidental, or consequential loss or damage (including loss of data,
|
||||
profits, goodwill, or any type of loss or damage suffered as a result of any
|
||||
action brought by a third party) even if such damage or loss was reasonably
|
||||
foreseeable or Xilinx had been advised of the possibility of the same.
|
||||
|
||||
CRITICAL APPLICATIONS
|
||||
Xilinx products are not designed or intended to be fail-safe, or for use in any
|
||||
application requiring fail-safe performance, such as life-support or safety
|
||||
devices or systems, Class III medical devices, nuclear facilities, applications
|
||||
related to the deployment of airbags, or any other applications that could lead
|
||||
to death, personal injury, or severe property or environmental damage
|
||||
(individually and collectively, "Critical Applications"). Customer assumes the
|
||||
sole risk and liability of any use of Xilinx products in Critical Applications,
|
||||
subject only to applicable laws and regulations governing limitations on product
|
||||
liability.
|
||||
|
||||
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
|
||||
ALL TIMES.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef ___AP__VIDEO__
|
||||
#define ___AP__VIDEO__
|
||||
|
||||
#include "ap_int.h"
|
||||
|
||||
|
||||
/* Parametrized RGB structure */
|
||||
template <int A, int D, int C>
|
||||
struct ap_rgb{
|
||||
ap_uint<A> B;
|
||||
ap_uint<D> G;
|
||||
ap_uint<C> R;
|
||||
};
|
||||
|
||||
/* Parametrized YUV structure */
|
||||
template <int A, int B, int C>
|
||||
struct ap_yuv{
|
||||
ap_uint<A> Y;
|
||||
ap_int<B> U;
|
||||
ap_int<C> V;
|
||||
};
|
||||
|
||||
/* Line buffer class definition */
|
||||
template <typename T, int LROW, int LCOL>
|
||||
class ap_linebuffer{
|
||||
public:
|
||||
T M[LROW][LCOL];
|
||||
|
||||
ap_linebuffer(){
|
||||
#pragma AP ARRAY_PARTITION variable=M dim=1 complete
|
||||
//#pragma AP data_pack variable=M
|
||||
//#pragma AP dependence variable=M intra false
|
||||
//#pragma AP dependence variable=M inter false
|
||||
};
|
||||
~ap_linebuffer(){};
|
||||
void shift_up(int col);
|
||||
void shift_down(int col);
|
||||
void shift_down_all();
|
||||
void insert(T value, int row, int col);
|
||||
void insert_top(T value, int col);
|
||||
void insert_bottom(T value, int col);
|
||||
void print(int StartCol, int EndCol);
|
||||
T getval(int RowIndex,int ColIndex);
|
||||
};
|
||||
|
||||
/* Line buffer print function.
|
||||
* Prints the values of all rows in the line buffer
|
||||
* between StartCol and EndCol
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::print(int StartCol, int EndCol)
|
||||
{
|
||||
int i, j;
|
||||
for(i = LROW-1; i > -1; i--){
|
||||
printf("Line %d:\t",i);
|
||||
for(j=StartCol; j < EndCol; j++){
|
||||
printf("%d\t",M[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/* Line buffer shift up
|
||||
* Assumes new data pixel will be entered at the bottom of the line buffer
|
||||
* The bottom is row = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::shift_up(int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
int i;
|
||||
for(i = LROW-1; i > 0; i--){
|
||||
#pragma AP unroll
|
||||
M[i][col] = M[i-1][col];
|
||||
}
|
||||
}
|
||||
|
||||
/* Line buffer shift down
|
||||
* Assumes new data pixel will be entered at the top of the line buffer
|
||||
* The bottom is row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::shift_down(int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
int i;
|
||||
for(i = 0; i < LROW-1; i++){
|
||||
#pragma AP unroll
|
||||
M[i][col] = M[i+1][col];
|
||||
}
|
||||
}
|
||||
|
||||
/* Line buffer shift down
|
||||
* Assumes new data pixel will be entered at the top of the line buffer
|
||||
* The bottom is row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::shift_down_all()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW-1; i++){
|
||||
#pragma AP unroll
|
||||
for(j = 0; j < LCOL; j++){
|
||||
#pragma AP unroll factor=120
|
||||
M[i][j] = M[i+1][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Line buffer insert bottom
|
||||
* Inserts a new value in the bottom row of the line buffer at column = col
|
||||
* The bottom is row = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::insert_bottom(T value, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
|
||||
M[0][col] = value;
|
||||
}
|
||||
|
||||
/* Line buffer insert top
|
||||
* Inserts a new value in the top row of the line buffer at column = col
|
||||
* The bottom is row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::insert_top(T value, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
|
||||
M[LROW-1][col] = value;
|
||||
}
|
||||
|
||||
/* Line buffer insert
|
||||
* Inserts a new value at any location of the line buffer
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::insert(T value, int row, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
M[row][col] = value;
|
||||
}
|
||||
|
||||
/* Line buffer getval
|
||||
* Returns the data value in the line buffer at position RowIndex, ColIndex
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
T ap_linebuffer<T,LROW,LCOL>::getval(int RowIndex,int ColIndex)
|
||||
{
|
||||
#pragma AP inline
|
||||
|
||||
T return_value;
|
||||
return_value = M[RowIndex][ColIndex];
|
||||
return return_value;
|
||||
}
|
||||
|
||||
/* Memory window class definition */
|
||||
template <typename T, int LROW, int LCOL>
|
||||
class ap_window{
|
||||
public:
|
||||
T M[LROW][LCOL];
|
||||
|
||||
ap_window(){
|
||||
#pragma AP ARRAY_PARTITION variable=M dim=0 complete
|
||||
//#pragma AP data_pack variable=M
|
||||
};
|
||||
~ap_window(){};
|
||||
void shift_right();
|
||||
void shift_left();
|
||||
void shift_up();
|
||||
void shift_down();
|
||||
void insert(T value, int row,int col);
|
||||
void print();
|
||||
T getval(int RowIndex,int ColIndex);
|
||||
};
|
||||
|
||||
/* Window print
|
||||
* Prints the entire contents of the memory window
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::print()
|
||||
{
|
||||
int i, j;
|
||||
printf("Window Size = %d x %d\n",LROW,LCOL);
|
||||
printf("Col \t");
|
||||
for(j = 0; j < LCOL; j++){
|
||||
printf("%d \t",j);
|
||||
}
|
||||
printf("\n");
|
||||
for(i = LROW-1; i > -1; i--){
|
||||
printf("Row %d: \t",i);
|
||||
for(j=0; j < LCOL; j++){
|
||||
printf("%d\t",M[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/* Window shift right
|
||||
* Moves all the contents of the window horizontally
|
||||
* Assumes new values will be placed in column = LCOL-1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_right()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW; i++){
|
||||
#pragma AP unroll
|
||||
for(j=0; j < LCOL-1; j++){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i][j+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window shift left
|
||||
* Moves all the contents of the window horizontally
|
||||
* Assumes new values will be placed in column = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_left()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW; i++){
|
||||
#pragma AP unroll
|
||||
for(j=LCOL-1; j > 0; j--){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i][j-1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window shift up
|
||||
* Moves all the contents of the window vertically
|
||||
* Assumes new values will be placed in row = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_up()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = LROW-1; i > 0; i--){
|
||||
#pragma AP unroll
|
||||
for(j=0; j < LCOL; j++){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i-1][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window shift down
|
||||
* Moves all the contents of the window vertically
|
||||
* Assumes new values will be placed in row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_down()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW-1; i++){
|
||||
#pragma AP unroll
|
||||
for(j=0; j < LCOL; j++){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i+1][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window insert
|
||||
* Inserts a new value at any location of the window
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::insert(T value, int row, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
M[row][col] = value;
|
||||
}
|
||||
|
||||
/* Window getval
|
||||
* Returns the value of any window location
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
T ap_window<T,LROW,LCOL>::getval(int RowIndex, int ColIndex)
|
||||
{
|
||||
#pragma AP inline
|
||||
T return_value;
|
||||
return_value = M[RowIndex][ColIndex];
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,19 @@
|
||||
#include <ap_int.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
int is_packet_complete(int *count, int size) {
|
||||
|
||||
//Increase by 4 Bytes.
|
||||
*count += 4;
|
||||
|
||||
//If the Current Count Value is Equal to the Packet Size then Return 1.
|
||||
if (*count == size)
|
||||
{
|
||||
*count = 0;
|
||||
return 1;
|
||||
} else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
#ifndef _PACKET_MODE_OPERATIONS_H_
|
||||
#define _PACKET_MODE_OPERATIONS_H_
|
||||
|
||||
int is_packet_complete(int *count, int size);
|
||||
|
||||
#endif
|
||||
17
Hardware/Vivado_HLS_IPs/Sobel_Filter/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Sobel_Filter/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Sobel_Filter
|
||||
|
||||
set_top sobel_filter
|
||||
|
||||
add_files sobel.cpp
|
||||
add_files sobel_operations.cpp
|
||||
add_files packet_mode_operations.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Sobel Filter" -version "5.8"
|
||||
611
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.cpp
Normal file
611
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.cpp
Normal file
@@ -0,0 +1,611 @@
|
||||
#include "sobel.h"
|
||||
#include "sobel_operations.h"
|
||||
#include "packet_mode_operations.h"
|
||||
|
||||
/*
|
||||
* sobel_filter()
|
||||
*
|
||||
* The Hardware Funtionality of the Sobel Filter (HW Accelerator) Core.
|
||||
*
|
||||
* The Sobel Filter is a HW Accelerator that Applies Sobel Edge Detection on Images.
|
||||
* It Receives and Processes the Image Data in Rows.
|
||||
* In order to Produce one Processed Row it Requires 3 Received Rows.
|
||||
* This Precondition is due to the Fact that Edge Detection is Applied to a Pixel according to its Neighbor Pixels.
|
||||
*
|
||||
* Once the Sobel Filter Receives the First 3 Rows it Produces one Processed Row.
|
||||
* Then it Rejects the First Row, Sets the Second Row as First and Sets the Last Row as Second.
|
||||
* The Next/Newly Received Row is Set as the Last of the Rows.
|
||||
* Now there are, again, 3 Rows in Order to Produce the Next Processed Row.
|
||||
* This Procedure Carries on until all the Rows of the Image are Received and Processed.
|
||||
*
|
||||
* NOTE that the First and Last Rows of the Processed Image are Filled with Dark Pixels.
|
||||
* NOTE also that the First and Last Columns of all the Rows of the Processed Image are Filled with Dark Pixels.
|
||||
*
|
||||
* The Sobel Edge Detection Cannot be Applied to the Perimetric Pixels of the Image Since they Miss the Required Amount of Neighbors
|
||||
* this is why they are Filled with Dark Pixels.
|
||||
*
|
||||
* The Sequential Steps of the Sobel Filter are as Follows:
|
||||
*
|
||||
* a --> Send the First Row which is Filled with Dark Pixels.
|
||||
* b --> Pre-Fetch the 3 First Rows of the Image.
|
||||
* c --> Process the 3 Rows.
|
||||
* d --> Fill the First and Last Columns of the Produced Row with Dark Pixels.
|
||||
* e --> Send the Produced Row.
|
||||
* f --> Receive the Next Row.
|
||||
* g --> Start Again from Step c Until Receiving and Processing all the Rows.
|
||||
* h --> Send the Last Row which is Filled with Dark Pixels.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Stream Input/Slave Interface of the Core Used to Receive the Image Data.
|
||||
* 02 --------> The AXI Stream Output/Master Interface of the Core Used to Forward the Processed Image Data.
|
||||
* 03 to 06 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*
|
||||
*
|
||||
* IMPORTANT TECHNIQUES Used to Improve the Overall Performance:
|
||||
*
|
||||
* A)Each Image Row is not Received in a Single Buffer.
|
||||
* Instead, while it is being Received it is Equally Splitted and Distributed in 16 Sector Buffers.
|
||||
* Each Sector Buffer has no Dependence with the Rest Sector Buffers so the 16 Pieces of the Image Row Can be Processed in Parallel.
|
||||
* The HLS Tool Creates 16 Processing Units to Make Parallel Processing Possible.
|
||||
*
|
||||
*
|
||||
* B)Another Improvement Technique is the Usage of Four Line Sector Buffers which Allows
|
||||
* the Core to Process the Current 3 Rows while Concurrently Receiving the Next Row.
|
||||
* The Four Line Sector Buffer is Designed with 4 Lines where each is Used to Store the Data of a Single Row.
|
||||
* When the Sobel Filter Receives and Fills the 3 First Lines with 3 Rows it Starts the Processing.
|
||||
* The Fourth Line is Free to Start Receiving the Next Row while the Rest 3 Lines are Occupied with the Processing.
|
||||
*
|
||||
*
|
||||
* C)In Older Approaches after 3 Lines of a Three Line Sector Buffer were Processed the Lines would
|
||||
* Have to be Shifted Up so that the Last Line Could be Fed with the Next Received Row.
|
||||
* This Approach Required a Significant Amount of Copies where each Pixel of a Line of the Sector Buffer would Have to be Copied to the Upper Line.
|
||||
* The new Technique Requires Zero Copies as it Uses Indexing to Store the Received Rows in the Four Line Sector Buffer.
|
||||
*
|
||||
* Indexing Concerns which Should be Considered as the First, Second and Third Row to Process and where the Next Received Row Should be Stored.
|
||||
*
|
||||
* Initially: the First Received Row is Stored in the Line with Index 0 of the Four Line Sector Buffer.
|
||||
* : the Second Received Row is Stored in the Line with Index 3 of the Four Line Sector Buffer.
|
||||
* : the Third Received Row is Stored in the Line with Index 2 of the Four Line Sector Buffer.
|
||||
* : the Line with Index 1 is Used to Store the Next Received Line while the Other 3 are being Processed.
|
||||
*
|
||||
* When the Process of the 3 Lines Completes and a new Row is Received then the Indexing Changes so that we Can Start a new Processing and Receive another Row.
|
||||
* Now the First Row is no Longer Needed so the Line with Index 0 will be Used to Receive the Next Row.
|
||||
* The Second Row Becomes the First Row for the New Processing so the Line with Index 3 will be Used as the First Row.
|
||||
* The Third Row Becomes the Second Row for the New Processing so the Line with Index 2 will be Used as the Second Row.
|
||||
* The Last Received Row Becomes the Third Row for the New Processing so the Line with Index 1 will be Used as the Third Row.
|
||||
*
|
||||
* Following the Same Pattern as to which Lines to Process and where to Store the Next Row Leads to the Table Below:
|
||||
*
|
||||
* Index 0 | First Row | Next Row | Third Row | Second Row |
|
||||
* Index 1 | Next Row | Third Row | Second Row | First Row |
|
||||
* Index 2 | Third Row | Second Row | First Row | Next Row |
|
||||
* Index 3 | Second Row | First Row | Next Row | Third Row |
|
||||
*
|
||||
* To Make Indexing Applicable as Part of the Code we Used the first, second, last and temp Integer Variables which Hold the Current Index
|
||||
* in the Four Line Sector Buffer where each Row is Stored.
|
||||
*
|
||||
* In order to Calculate the Next Indexing for each Row we Used the Formula Below:
|
||||
* Index = (Index + 3) % 4
|
||||
*/
|
||||
int sobel_filter(/*01*/AXI_PIXEL STREAM_IN[MAX_WIDTH],
|
||||
/*02*/AXI_PIXEL STREAM_OUT[MAX_WIDTH],
|
||||
/*03*/int rows,
|
||||
/*04*/int cols,
|
||||
/*05*/int packet_mode_en,
|
||||
/*06*/int packet_size
|
||||
)
|
||||
{
|
||||
/*
|
||||
* Set the Fifo of the STREAM_OUT and STREAM_IN Interfaces to be Implemented with LUT RAM Memory.
|
||||
*/
|
||||
#pragma HLS RESOURCE variable=STREAM_OUT core=FIFO_LUTRAM
|
||||
#pragma HLS RESOURCE variable=STREAM_IN core=FIFO_LUTRAM
|
||||
|
||||
/*
|
||||
* The rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=rows bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* The cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=cols bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* The packet_mode_en is a Register to Store a Value that Enables/Disables the Packet Mode.
|
||||
* The Packet Mode Should be Enabled when the Data are Transferred with Scatter/Gather Transactions.
|
||||
* When the Packet Mode is Enabled the Core Sends a TLAST=1 Signal in the Output Interface for each Transmitted Packet.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=packet_mode_en bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* The packet_size is a Register to Store the Size that each Packet Should Have (e.g 4K) when Using Scatter/Gather Transfers.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=packet_size bundle=S_AXI4_LITE
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* Set the STREAM_OUT and STREAM_IN Interfaces of the Core to be AXI Stream Interfaces.
|
||||
* The Fifo Depth is Set to 1920 which is the Maximum Image Width that the Core Can Support to Process.
|
||||
*/
|
||||
#pragma HLS INTERFACE axis depth=1920 port=STREAM_IN
|
||||
#pragma HLS INTERFACE axis depth=1920 port=STREAM_OUT
|
||||
|
||||
int bytes_count; //Count the Number of Tranferred Bytes.
|
||||
int first; //Used to Know where the First Received Row is Located in the LINE4_SECTOR_BUFFER.
|
||||
int second; //Used to Know where the Second Received Row is Located in the LINE4_SECTOR_BUFFER.
|
||||
int last; //Used to Know where the Last Received Row is Located in the LINE4_SECTOR_BUFFER.
|
||||
int temp; //Used to Know where the Newest Received Row Should be Temporalily Located in the LINE4_SECTOR_BUFFER.
|
||||
|
||||
/*
|
||||
* The Number of Iterations Required to Receive or Send each Sector of a Row.
|
||||
* The sector_iter is an Array with as many Fields as the Number of Sectors.
|
||||
* The sector_iter Array is Configured to be Completely Partitioned according to the #pragma HLS ARRAY_PARTITION.
|
||||
*/
|
||||
int sector_iter[SECTORS];
|
||||
#pragma HLS ARRAY_PARTITION variable=sector_iter dim=1 complete
|
||||
|
||||
int sector_size; //The Number of Columns that each Sector Should Store.
|
||||
int remaining_pixels; //If the Number of Columns is not an Integer Multiple of the Number of Sectors then we Have Remaining Pixels that Should be Distributed in all the Sectors.
|
||||
|
||||
const RGB zero_pixel = {0, 0, 0}; //This is a Dark Pixel Used to Set the First and Last Row and all the First and Last Columns of the Image.
|
||||
|
||||
/*
|
||||
* Declare 16 Memory Buffers of Type LINE4_SECTOR_BUFFER.
|
||||
* Each Buffer is Set to be Dual Port BRAM according to the #pragma HLS RESOURCE.
|
||||
*
|
||||
* These Buffers are Used to Receive the Image Rows before being Processed.
|
||||
*/
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR0;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR0 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR1;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR1 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR2;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR2 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR3;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR3 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR4;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR4 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR5;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR5 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR6;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR6 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR7;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR7 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR8;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR8 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR9;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR9 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR10;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR10 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR11;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR11 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR12;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR12 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR13;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR13 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR14;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR14 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR15;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR15 core=RAM_2P_BRAM
|
||||
|
||||
/*
|
||||
* Declare 16 Memory Buffers of Type LINE1_SECTOR_BUFFER.
|
||||
* Each Buffer is Set to be Dual Port BRAM according to the #pragma HLS RESOURCE.
|
||||
*
|
||||
* These Buffers are Used to Store the Image Rows after being Processed.
|
||||
*/
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR0;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR0 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR1;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR1 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR2;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR2 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR3;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR3 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR4;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR4 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR5;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR5 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR6;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR6 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR7;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR7 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR8;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR8 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR9;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR9 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR10;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR10 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR11;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR11 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR12;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR12 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR13;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR13 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR14;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR14 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR15;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR15 core=RAM_2P_BRAM
|
||||
|
||||
/*
|
||||
* Set Initial Values.
|
||||
*/
|
||||
bytes_count = 0;
|
||||
first = 0;
|
||||
second = 3;
|
||||
last = 2;
|
||||
temp = 0;
|
||||
|
||||
|
||||
//Calculate the Number of Columns that Should be Stored to each Sector Buffer.
|
||||
//NOTE that this is the Initial Sector Size that is Equal to All the Sector Buffers.
|
||||
sector_size = (int)(cols / SECTORS);
|
||||
|
||||
/*
|
||||
* Calculate any Remaining Bytes in Case the Number of Columns is not an Integer Multiple of the Number of Sector Buffers.
|
||||
*
|
||||
* For Example, for an Image of Width 524 Pixels we Have 524 Pixels / 16 Sectors = 32.75 Pixels which is Not an Integer Multiple of the 16 Sector Buffers.
|
||||
* For each Sector Buffer we Have a Sector Size of 32 Pixels so 32 Pixels * 16 Sectors = 512 which Leads to Have 12 Remaining Pixels from the Initial 524.
|
||||
*
|
||||
* As a Result each of the 16 Sector Buffers Initially Has a Sector Size of 32.
|
||||
* The Remaining Pixels Should be Distributed to the Sector Buffers so the First 12 Sector Buffers will Have a Sector Size with one More Pixel which Leads to 33 Pixels Sector Size.
|
||||
*
|
||||
* |Sector0 |Sector1 |Sector2 |Sector3 |Sector4 |Sector5 |Sector6 |Sector7 |Sector8 |Sector9 |Sector10 |Sector11 |Sector12 |Sector13 |Sector14 |Sector15|
|
||||
* |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |32 |32 |32 |32 |
|
||||
*
|
||||
*/
|
||||
remaining_pixels = cols - (sector_size * SECTORS);
|
||||
|
||||
//Loop to Distribute the Remaining Bytes to the Sector Buffers.
|
||||
for (int i = 0; i < SECTORS; i++)
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
//Set the Array Field of the Corresponding Sector Buffer with the Initial Sector Size.
|
||||
sector_iter[i] = sector_size;
|
||||
|
||||
//Check if we still Have Remaining Pixels
|
||||
if (remaining_pixels > 0)
|
||||
{
|
||||
//Decrease the Number of Remaining Pixels.
|
||||
remaining_pixels--;
|
||||
|
||||
//Increment by 1 the Sector Size of the Corresponding Sector Buffer
|
||||
sector_iter[i] ++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The First Line/Row of an Image Processed with Sobel Edge Detection is Always Filled with Zero Pixels.
|
||||
* So, Send the First Row of Zero Pixels.
|
||||
*/
|
||||
send_1st_line:
|
||||
for (int col=0; col<cols; col++)
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
AXI_PIXEL output_pixel; //Declare a AXI_PIXEL that Represents the AXI Stream Output Interface.
|
||||
|
||||
output_pixel.strb = 0xF; //Set the Strobe of the AXI Stream Interface so that all 4 Transmitted Bytes are Valid.
|
||||
output_pixel.user = 0x1;
|
||||
output_pixel.tdest = 0x1;
|
||||
|
||||
//If the Packet Mode is Enabled then we Have to Set the TLAST to 1 if a Full Packet of Size packet_size is Transmitted.
|
||||
if (packet_mode_en == 1)
|
||||
{
|
||||
//The TLAST(last) Gets the Return Value of the is_packet_complete() which Returns 1 if the Number of Transmitted Bytes is Equal to the Packket Size.
|
||||
output_pixel.last = is_packet_complete(&bytes_count, packet_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_pixel.last = 0x0;
|
||||
}
|
||||
|
||||
//Set the Data to Transmit to Have Zero Value Since we Transmit Zero Pixels.
|
||||
output_pixel.data = 0x0;
|
||||
|
||||
//Forward the Data along with the Rest Signals to the AXI Stream Output Interface.
|
||||
STREAM_OUT[col] = output_pixel;
|
||||
}
|
||||
|
||||
/*
|
||||
* The Sobel Edge Detection Algorithm Requires Three Rows in Order to Produce one Processed Row.
|
||||
* So, Pre-Fetch the First 3 Rows.
|
||||
*/
|
||||
prefetch_3lines:
|
||||
for (int row=0; row<3; row++)
|
||||
{
|
||||
//Receive a Row which is Distributed to the 16 Four Line Sector Buffers of Type LINE4_SECTOR_BUFFER.
|
||||
receive_post_line(STREAM_IN,
|
||||
&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
temp,
|
||||
sector_iter
|
||||
);
|
||||
|
||||
//Calculate the Vertical Position where the Next Received Row Should be Stored in the Four Line Sector Buffers.
|
||||
temp = (temp+3)%4;
|
||||
}
|
||||
|
||||
/*
|
||||
* Produce a Processed Row from the 3 Pre-Fetched Rows.
|
||||
* Send the Processed Row and then Receive a New Row in Order to Produce again a Processed Row.
|
||||
*
|
||||
* Loop Until Receiving all the Rows of the Image.
|
||||
*/
|
||||
proc_module:
|
||||
for (int row=0; row<rows-3; row++)
|
||||
{
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR0 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR1 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR2 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR3 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR4 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR5 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR6 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR7 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR8 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR9 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR10 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR11 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR12 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR13 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR14 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR15 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR0 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR1 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR2 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR3 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR4 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR5 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR6 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR7 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR8 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR9 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR10 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR11 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR12 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR13 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR14 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR15 array //false
|
||||
|
||||
//Process the 3 Rows that are Received in the 16 Four Line Sector Buffers and Produce one Processed Row.
|
||||
start_sobel_operations(&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_size,
|
||||
first,
|
||||
second,
|
||||
last
|
||||
);
|
||||
|
||||
//Set the First Pixel of the First One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the First Pixel of the First One Line Sector Buffer is Actually the First Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR0.insert(zero_pixel, 0, 0);
|
||||
|
||||
//Set the Last Pixel of the Last One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the Last Pixel of the Last One Line Sector Buffer is Actually the Last Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR15.insert(zero_pixel, 0, sector_iter[15]-1);
|
||||
|
||||
//The Produced Row from the three Processed Rows is Stored in the 16 One Line Sector Buffers.
|
||||
//Send the Produced Row over the AXI Stream Out Interface.
|
||||
send_line(STREAM_OUT,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_iter,
|
||||
packet_mode_en,
|
||||
packet_size,
|
||||
&bytes_count
|
||||
);
|
||||
|
||||
//Receive the Next Row which is Distributed to the 16 Four Line Sector Buffers.
|
||||
receive_post_line(STREAM_IN,
|
||||
&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
temp,
|
||||
sector_iter
|
||||
);
|
||||
|
||||
//Calculate which Should be Considered as the First Line for Producing the Next Processed Row.
|
||||
first = (first+3)%4;
|
||||
|
||||
//Calculate which Should be Considered as the Second Line for Producing the Next Processed Row.
|
||||
second = (second+3)%4;
|
||||
|
||||
//Calculate which Should be Considered as the Last Line for Producing the Next Processed Row.
|
||||
last = (last+3)%4;
|
||||
|
||||
//Calculate where the New Received Row Should be Stored in the Four Line Sector Buffers.
|
||||
temp = (temp+3)%4;
|
||||
}
|
||||
|
||||
//The Previous for Loop Ended before Processing the Last Received Row.
|
||||
//So, Process here the Last 3 Rows to Produce the Last Processed Row.
|
||||
start_sobel_operations(&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_size,
|
||||
first,
|
||||
second,
|
||||
last);
|
||||
|
||||
//Set the First Pixel of the First One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the First Pixel of the First One Line Sector Buffer is Actually the First Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR0.insert(zero_pixel, 0, 0);
|
||||
|
||||
//Set the Last Pixel of the Last One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the Last Pixel of the Last One Line Sector Buffer is Actually the Last Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR15.insert(zero_pixel, 0, sector_iter[15]-1);
|
||||
|
||||
//Send the Last Produced Row over the AXI Stream Out Interface.
|
||||
send_line(STREAM_OUT,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_iter,
|
||||
packet_mode_en,
|
||||
packet_size,
|
||||
&bytes_count
|
||||
);
|
||||
|
||||
/*
|
||||
* The Last Line/Row of an Image Processed with Sobel Edge Detection is Always Filled with Zero Pixels.
|
||||
* So, Send the Last Row of Zero Pixels.
|
||||
*/
|
||||
send_last_line:
|
||||
for (int col=0; col<cols; col++)
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
AXI_PIXEL output_pixel; //Declare a AXI_PIXEL that Represents the AXI Stream Output Interface.
|
||||
|
||||
output_pixel.strb = 0xF; //Set the Strobe of the AXI Stream Interface so that all 4 Transmitted Bytes are Valid.
|
||||
output_pixel.user = 0x1;
|
||||
output_pixel.tdest = 0x1;
|
||||
|
||||
//Since this is the Last Row Check if this is the Last Pixel to Send.
|
||||
if (col==cols-1 )
|
||||
{
|
||||
//Set the TLAST Signal to 1 to Indicate that this will be the Last Transmission of the Data.
|
||||
output_pixel.last = 0x1;
|
||||
}
|
||||
//If the Packet Mode is Enabled then we Have to Set the TLAST to 1 if a Full Packet of Size packet_size is Transmitted.
|
||||
else if(packet_mode_en == 1)
|
||||
{
|
||||
//The TLAST(last) Gets the Return Value of the is_packet_complete() which Returns 1 if the Number of Transmitted Bytes is Equal to the Packet Size.
|
||||
output_pixel.last = is_packet_complete(&bytes_count, packet_size);
|
||||
}
|
||||
else
|
||||
output_pixel.last = 0x0;
|
||||
|
||||
//Set the Data to Transmit to Have Zero Value Since we Transmit Zero Pixels.
|
||||
output_pixel.data = 0x0;
|
||||
|
||||
//Forward the Data along with the Rest Signals to the AXI Stream Output Interface.
|
||||
STREAM_OUT[col] = output_pixel;
|
||||
}
|
||||
|
||||
bytes_count = 0; // Byte Counter
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
31
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.h
Normal file
31
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef _SOBEL_H_
|
||||
#define _SOBEL_H_
|
||||
#include "ap_bmp.h"
|
||||
#include "ap_axi_sdata2.h"
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_video.h"
|
||||
|
||||
#define MAX_WIDTH 1920
|
||||
#define MAX_HEIGHT 1080
|
||||
|
||||
#define SECTORS 16
|
||||
|
||||
#define ABSDIFF(x,y) ((x>y)? x - y : y - x)
|
||||
#define ABS(x) ((x>0)? x : -x)
|
||||
#define RGB(r,g,b) ((((word)r)<<16)|(((word)g)<<8)|((word)b))
|
||||
|
||||
typedef ap_rgb <8, 8, 8> RGB;
|
||||
typedef ap_axiu2 <32, 1, 1, 1> AXI_PIXEL;
|
||||
|
||||
typedef ap_linebuffer <unsigned char, 4, (MAX_WIDTH/SECTORS)+2> LINE4_SECTOR_BUFFER;
|
||||
typedef ap_linebuffer <RGB, 1, (MAX_WIDTH/SECTORS)+1> LINE1_SECTOR_BUFFER;
|
||||
|
||||
int sobel_filter(AXI_PIXEL STREAM_IN[MAX_WIDTH],
|
||||
AXI_PIXEL STREAM_OUT[MAX_WIDTH],
|
||||
int rows,
|
||||
int cols,
|
||||
int packet_mode_en,
|
||||
int packet_size);
|
||||
|
||||
#endif
|
||||
1456
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.cpp
Normal file
1456
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.cpp
Normal file
File diff suppressed because it is too large
Load Diff
195
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.h
Normal file
195
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.h
Normal file
@@ -0,0 +1,195 @@
|
||||
#ifndef _SOBEL_OPERATIONS_H_
|
||||
#define _SOBEL_OPERATIONS_H_
|
||||
|
||||
unsigned char rgb2y(RGB pix);
|
||||
|
||||
/*
|
||||
* Template of the sobel_operator()
|
||||
*
|
||||
* The sobel_operator() Makes Sobel Computation Using a 3x3 Neighborhood
|
||||
*/
|
||||
template<int dummy_index>
|
||||
RGB sobel_operator(LINE4_SECTOR_BUFFER *window,
|
||||
unsigned int x_index,
|
||||
unsigned int y_first,
|
||||
unsigned int y_second,
|
||||
unsigned int y_last)
|
||||
{
|
||||
#pragma HLS INLINE off
|
||||
#pragma HLS EXPRESSION_BALANCE off
|
||||
|
||||
short x_weight = 0;
|
||||
short y_weight = 0;
|
||||
|
||||
short x_weight_array[9];
|
||||
#pragma HLS ARRAY_PARTITION variable=x_weight_array complete dim=1
|
||||
short y_weight_array[9];
|
||||
#pragma HLS ARRAY_PARTITION variable=y_weight_array complete dim=1
|
||||
|
||||
short edge_weight;
|
||||
unsigned char edge_val;
|
||||
RGB pixel;
|
||||
|
||||
const char x_op[3][3] = { {-1, 0, 1},
|
||||
{-2, 0, 2},
|
||||
{-1, 0, 1}};
|
||||
#pragma HLS ARRAY_PARTITION variable=x_op complete dim=1
|
||||
|
||||
const char y_op[3][3] = { { 1, 2, 1},
|
||||
{ 0, 0, 0},
|
||||
{-1,-2,-1}};
|
||||
#pragma HLS ARRAY_PARTITION variable=y_op complete dim=1
|
||||
|
||||
sobel_mul:
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
//Compute Approximation of the Gradients in the X-Y Direction for the First Row of x_op and y_op.
|
||||
for(char j = 0; j < 3; j++)
|
||||
{
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight_array[j] = (window->getval(y_first,x_index + j) * x_op[0][j]);
|
||||
// Y Direction Gradient
|
||||
y_weight_array[j] = (window->getval(y_first,x_index + j) * y_op[0][j]);
|
||||
}
|
||||
|
||||
//Compute Approximation of the Gradients in the X-Y Direction for the Second Row of x_op and y_op.
|
||||
for(char j = 0; j < 3; j++)
|
||||
{
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight_array[3+j] = (window->getval(y_second,x_index + j) * x_op[1][j]);
|
||||
// Y Direction Gradient
|
||||
y_weight_array[3+j] = (window->getval(y_second,x_index + j) * y_op[1][j]);
|
||||
}
|
||||
|
||||
//Compute Approximation of the Gradients in the X-Y Direction for the Third Row of x_op and y_op.
|
||||
for(char j = 0; j < 3; j++){
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight_array[6+j] = (window->getval(y_last,x_index + j) * x_op[2][j]);
|
||||
// Y Direction Gradient
|
||||
y_weight_array[6+j] = (window->getval(y_last,x_index + j) * y_op[2][j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for(char j = 0; j < 9; j++) {
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight += x_weight_array[j];
|
||||
// Y Direction Gradient
|
||||
y_weight += y_weight_array[j];
|
||||
}
|
||||
|
||||
edge_weight = ABS(x_weight) + ABS(y_weight);
|
||||
|
||||
edge_val = (255-(unsigned char)(edge_weight));
|
||||
|
||||
//Edge Thresholding
|
||||
if(edge_val > 200)
|
||||
{
|
||||
edge_val = 255;
|
||||
}
|
||||
else if(edge_val < 100)
|
||||
{
|
||||
edge_val = 0;
|
||||
}
|
||||
|
||||
pixel.R = pixel.G = pixel.B = edge_val;
|
||||
|
||||
return pixel;
|
||||
}
|
||||
|
||||
void start_sobel_operations(
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_0,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_1,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_2,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_3,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_4,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_5,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_6,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_7,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_8,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_9,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_10,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_11,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_12,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_13,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_14,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_15,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_0,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_1,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_2,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_3,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_4,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_5,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_6,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_7,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_8,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_9,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_10,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_11,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_12,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_13,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_14,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_15,
|
||||
int sector_size,
|
||||
int first,
|
||||
int second,
|
||||
int last);
|
||||
|
||||
void send_line(
|
||||
AXI_PIXEL *STREAM_OUT,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_0,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_1,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_2,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_3,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_4,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_5,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_6,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_7,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_8,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_9,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_10,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_11,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_12,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_13,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_14,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_15,
|
||||
int *sector_iter_array,
|
||||
int packet_mode_enable,
|
||||
int packet_size,
|
||||
int *remain_bytes);
|
||||
|
||||
void receive_post_line(
|
||||
AXI_PIXEL *STREAM_IN,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_0,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_1,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_2,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_3,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_4,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_5,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_6,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_7,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_8,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_9,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_10,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_11,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_12,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_13,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_14,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_15,
|
||||
int row,
|
||||
int *sector_iter_array);
|
||||
|
||||
#endif
|
||||
49
Hardware/create_project.tcl
Normal file
49
Hardware/create_project.tcl
Normal file
@@ -0,0 +1,49 @@
|
||||
##################################################################################
|
||||
# #
|
||||
# This Script #
|
||||
# 1.Creates a New Vivado Project #
|
||||
# 2.Generates the Block Design Described in "pcie_acceleration_vc707_design.tcl" #
|
||||
# 3.Imports the Required Constraint File "constraints.xdc" #
|
||||
# 4.Imports the Required HDL Wrapper File "hdl_wrapper.v" #
|
||||
# #
|
||||
##################################################################################
|
||||
|
||||
set relative_directory [pwd]
|
||||
|
||||
set project_directory $relative_directory/pcie_acceleration_vc707
|
||||
|
||||
set ip_repository $relative_directory/Vivado_HLS_IPs
|
||||
|
||||
set constraints_directory $relative_directory/Constraints
|
||||
|
||||
set hdl_wrapper_directory $relative_directory/HDL_Wrapper
|
||||
|
||||
set block_design_directory $relative_directory/Vivado_Block_Design
|
||||
|
||||
set src_bd_design_directory $relative_directory/pcie_acceleration_vc707/pcie_acceleration_vc707.srcs/sources_1/bd/pcie_acceleration_vc707_design
|
||||
|
||||
#Create a New Project Named "pcie_accel_demo"
|
||||
create_project pcie_accel_demo $project_directory -part xc7vx485tffg1761-2
|
||||
|
||||
#Set the Board Part which is Required for Certain Configurations such as the Uartlite Controller (RS-232)
|
||||
set_property board_part xilinx.com:vc707:part0:1.2 [current_project]
|
||||
|
||||
#Add the HLS IPs before Opening the Block Design
|
||||
set_property ip_repo_paths {Vivado_HLS_IPs/Acceleration_Scheduler_Direct Vivado_HLS_IPs/Acceleration_Scheduler_Indirect Vivado_HLS_IPs/Acceleration_Scheduler_SG_XDMA Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler Vivado_HLS_IPs/Fetch_Scheduler Vivado_HLS_IPs/Interrupt_Manager Vivado_HLS_IPs/Info_Memory_Block Vivado_HLS_IPs/Send_Scheduler Vivado_HLS_IPs/Sobel_Filter} [current_project]
|
||||
update_ip_catalog
|
||||
|
||||
#Add the Block Design
|
||||
source $block_design_directory/pcie_acceleration_vc707_design.tcl
|
||||
|
||||
#Add Constraint Files
|
||||
add_files -fileset constrs_1 -norecurse $constraints_directory/constraints.xdc
|
||||
import_files -fileset constrs_1 $constraints_directory/constraints.xdc
|
||||
|
||||
#Add the HDL Wrapper
|
||||
add_files -norecurse -scan_for_includes $hdl_wrapper_directory/hdl_wrapper.v
|
||||
import_files -norecurse $hdl_wrapper_directory/hdl_wrapper.v
|
||||
update_compile_order -fileset sources_1
|
||||
update_compile_order -fileset sources_1
|
||||
update_compile_order -fileset sim_1
|
||||
|
||||
|
||||
Reference in New Issue
Block a user