Squash commits for public release
This commit is contained in:
0
Hardware/.keep
Normal file
0
Hardware/.keep
Normal file
0
Hardware/Constraints/.keep
Normal file
0
Hardware/Constraints/.keep
Normal file
22
Hardware/Constraints/constraints.xdc
Normal file
22
Hardware/Constraints/constraints.xdc
Normal file
@@ -0,0 +1,22 @@
|
||||
#--------------------------------------------------------------------------------
|
||||
#Copyright 1986-2015 Xilinx, Inc. All Rights Reserved.
|
||||
#--------------------------------------------------------------------------------
|
||||
#Tool Version: Vivado v.2015.4 (lin64) Build 1412921 Wed Nov 18 09:44:32 MST 2015
|
||||
#Date : 2025
|
||||
#--------------------------------------------------------------------------------
|
||||
|
||||
set_property IOSTANDARD LVCMOS18 [get_ports perst]
|
||||
set_property PULLUP true [get_ports perst]
|
||||
set_property LOC AV35 [get_ports perst]
|
||||
|
||||
#PCIe Reference Clock (Differential) Ports
|
||||
set_property PACKAGE_PIN K8 [get_ports REFCLK_p]
|
||||
set_property PACKAGE_PIN K7 [get_ports REFCLK_n]
|
||||
|
||||
#DDR3 Initial Calibration Complete Led Indication Output Port
|
||||
set_property PACKAGE_PIN AN39 [get_ports init_calib_complete]
|
||||
set_property IOSTANDARD LVCMOS18 [get_ports init_calib_complete]
|
||||
|
||||
set_property CLOCK_DEDICATED_ROUTE BACKBONE [get_nets pcie_acceleration_vc707_design_i/clocking_wizard/inst/clk_in1_pcie_acceleration_vc707_design_clk_wiz_1_0]
|
||||
|
||||
set_property LOC IBUFDS_GTE2_X1Y5 [get_cells refclk_ibuf]
|
||||
0
Hardware/HDL_Wrapper/.keep
Normal file
0
Hardware/HDL_Wrapper/.keep
Normal file
128
Hardware/HDL_Wrapper/hdl_wrapper.v
Normal file
128
Hardware/HDL_Wrapper/hdl_wrapper.v
Normal file
@@ -0,0 +1,128 @@
|
||||
//--------------------------------------------------------------------------------
|
||||
//Copyright 1986-2015 Xilinx, Inc. All Rights Reserved.
|
||||
//--------------------------------------------------------------------------------
|
||||
//Tool Version: Vivado v.2015.4 (lin64) Build 1412921 Wed Nov 18 09:44:32 MST 2015
|
||||
//Date : 2025
|
||||
//--------------------------------------------------------------------------------
|
||||
|
||||
`timescale 1 ps / 1 ps
|
||||
|
||||
module pcie_acceleration_vc707_design_wrapper
|
||||
(REFCLK_p,
|
||||
REFCLK_n,
|
||||
ddr3_sdram_addr,
|
||||
ddr3_sdram_ba,
|
||||
ddr3_sdram_cas_n,
|
||||
ddr3_sdram_ck_n,
|
||||
ddr3_sdram_ck_p,
|
||||
ddr3_sdram_cke,
|
||||
ddr3_sdram_cs_n,
|
||||
ddr3_sdram_dm,
|
||||
ddr3_sdram_dq,
|
||||
ddr3_sdram_dqs_n,
|
||||
ddr3_sdram_dqs_p,
|
||||
ddr3_sdram_odt,
|
||||
ddr3_sdram_ras_n,
|
||||
ddr3_sdram_reset_n,
|
||||
ddr3_sdram_we_n,
|
||||
init_calib_complete,
|
||||
pcie_7x_mgt_rxn,
|
||||
pcie_7x_mgt_rxp,
|
||||
pcie_7x_mgt_txn,
|
||||
pcie_7x_mgt_txp,
|
||||
perst,
|
||||
reset,
|
||||
rs232_uart_rxd,
|
||||
rs232_uart_txd,
|
||||
sys_diff_clock_clk_n,
|
||||
sys_diff_clock_clk_p);
|
||||
input REFCLK_p;
|
||||
input REFCLK_n;
|
||||
output [13:0]ddr3_sdram_addr;
|
||||
output [2:0]ddr3_sdram_ba;
|
||||
output ddr3_sdram_cas_n;
|
||||
output [0:0]ddr3_sdram_ck_n;
|
||||
output [0:0]ddr3_sdram_ck_p;
|
||||
output [0:0]ddr3_sdram_cke;
|
||||
output [0:0]ddr3_sdram_cs_n;
|
||||
output [7:0]ddr3_sdram_dm;
|
||||
inout [63:0]ddr3_sdram_dq;
|
||||
inout [7:0]ddr3_sdram_dqs_n;
|
||||
inout [7:0]ddr3_sdram_dqs_p;
|
||||
output [0:0]ddr3_sdram_odt;
|
||||
output ddr3_sdram_ras_n;
|
||||
output ddr3_sdram_reset_n;
|
||||
output ddr3_sdram_we_n;
|
||||
output init_calib_complete;
|
||||
input [3:0]pcie_7x_mgt_rxn;
|
||||
input [3:0]pcie_7x_mgt_rxp;
|
||||
output [3:0]pcie_7x_mgt_txn;
|
||||
output [3:0]pcie_7x_mgt_txp;
|
||||
input perst;
|
||||
input reset;
|
||||
input rs232_uart_rxd;
|
||||
output rs232_uart_txd;
|
||||
input sys_diff_clock_clk_n;
|
||||
input sys_diff_clock_clk_p;
|
||||
|
||||
wire REFCLK_p;
|
||||
wire REFCLK_n;
|
||||
wire [13:0]ddr3_sdram_addr;
|
||||
wire [2:0]ddr3_sdram_ba;
|
||||
wire ddr3_sdram_cas_n;
|
||||
wire [0:0]ddr3_sdram_ck_n;
|
||||
wire [0:0]ddr3_sdram_ck_p;
|
||||
wire [0:0]ddr3_sdram_cke;
|
||||
wire [0:0]ddr3_sdram_cs_n;
|
||||
wire [7:0]ddr3_sdram_dm;
|
||||
wire [63:0]ddr3_sdram_dq;
|
||||
wire [7:0]ddr3_sdram_dqs_n;
|
||||
wire [7:0]ddr3_sdram_dqs_p;
|
||||
wire [0:0]ddr3_sdram_odt;
|
||||
wire ddr3_sdram_ras_n;
|
||||
wire ddr3_sdram_reset_n;
|
||||
wire ddr3_sdram_we_n;
|
||||
wire init_calib_complete;
|
||||
wire [3:0]pcie_7x_mgt_rxn;
|
||||
wire [3:0]pcie_7x_mgt_rxp;
|
||||
wire [3:0]pcie_7x_mgt_txn;
|
||||
wire [3:0]pcie_7x_mgt_txp;
|
||||
wire perst;
|
||||
wire reset;
|
||||
wire rs232_uart_rxd;
|
||||
wire rs232_uart_txd;
|
||||
wire sys_diff_clock_clk_n;
|
||||
wire sys_diff_clock_clk_p;
|
||||
|
||||
IBUFDS_GTE2 refclk_ibuf (.O(REFCLK), .ODIV2(), .I(REFCLK_p), .CEB(1'b0), .IB(REFCLK_n));
|
||||
|
||||
|
||||
pcie_acceleration_vc707_design pcie_acceleration_vc707_design_i
|
||||
(.REFCLK(REFCLK),
|
||||
.ddr3_sdram_addr(ddr3_sdram_addr),
|
||||
.ddr3_sdram_ba(ddr3_sdram_ba),
|
||||
.ddr3_sdram_cas_n(ddr3_sdram_cas_n),
|
||||
.ddr3_sdram_ck_n(ddr3_sdram_ck_n),
|
||||
.ddr3_sdram_ck_p(ddr3_sdram_ck_p),
|
||||
.ddr3_sdram_cke(ddr3_sdram_cke),
|
||||
.ddr3_sdram_cs_n(ddr3_sdram_cs_n),
|
||||
.ddr3_sdram_dm(ddr3_sdram_dm),
|
||||
.ddr3_sdram_dq(ddr3_sdram_dq),
|
||||
.ddr3_sdram_dqs_n(ddr3_sdram_dqs_n),
|
||||
.ddr3_sdram_dqs_p(ddr3_sdram_dqs_p),
|
||||
.ddr3_sdram_odt(ddr3_sdram_odt),
|
||||
.ddr3_sdram_ras_n(ddr3_sdram_ras_n),
|
||||
.ddr3_sdram_reset_n(ddr3_sdram_reset_n),
|
||||
.ddr3_sdram_we_n(ddr3_sdram_we_n),
|
||||
.init_calib_complete(init_calib_complete),
|
||||
.pcie_7x_mgt_rxn(pcie_7x_mgt_rxn),
|
||||
.pcie_7x_mgt_rxp(pcie_7x_mgt_rxp),
|
||||
.pcie_7x_mgt_txn(pcie_7x_mgt_txn),
|
||||
.pcie_7x_mgt_txp(pcie_7x_mgt_txp),
|
||||
.perst(perst),
|
||||
.reset(reset),
|
||||
.rs232_uart_rxd(rs232_uart_rxd),
|
||||
.rs232_uart_txd(rs232_uart_txd),
|
||||
.sys_diff_clock_clk_n(sys_diff_clock_clk_n),
|
||||
.sys_diff_clock_clk_p(sys_diff_clock_clk_p));
|
||||
endmodule
|
||||
0
Hardware/Vivado_Block_Design/.keep
Normal file
0
Hardware/Vivado_Block_Design/.keep
Normal file
2679
Hardware/Vivado_Block_Design/pcie_acceleration_vc707_design.tcl
Normal file
2679
Hardware/Vivado_Block_Design/pcie_acceleration_vc707_design.tcl
Normal file
File diff suppressed because it is too large
Load Diff
0
Hardware/Vivado_HLS_IPs/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/.keep
Normal file
@@ -0,0 +1,518 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "acceleration_scheduler_direct.h"
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Registers of the Sobel Filter
|
||||
* -----------------------------
|
||||
*/
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_TX_OFFSET 0x00000000
|
||||
|
||||
/*
|
||||
* Rx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_RX_OFFSET 0x00000030
|
||||
|
||||
|
||||
/*
|
||||
* This Set of Registers are Applicable for both Channels of the DMA.
|
||||
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
|
||||
*/
|
||||
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
|
||||
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
|
||||
|
||||
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
|
||||
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
|
||||
|
||||
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM)
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
|
||||
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
|
||||
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
|
||||
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
|
||||
|
||||
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
|
||||
|
||||
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
|
||||
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
|
||||
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
|
||||
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
|
||||
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
|
||||
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
|
||||
|
||||
/*
|
||||
* acceleration_scheduler_direct()
|
||||
*
|
||||
* The Hardware Funtionality of the Acceleration Scheduler Direct Core.
|
||||
*
|
||||
* The Acceleration Scheduler Direct Core is Part of the Acceleration Group Direct and is Used to Manage the whole Acceleration Procedure.
|
||||
* It Interacts with the DMA, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
|
||||
* It, also, Interacts with the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* a --> Enable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* b --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
|
||||
* c --> Setup and Start the Sobel Filter.
|
||||
* d --> Setup and Start the S2MM and MM2S DMA Transfers.
|
||||
* e --> Wait for an Interrupt by the DMA on Completion of the Transfer.
|
||||
* f --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
|
||||
* g --> Disable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* h --> Acknowledge the DMA Interrupt.
|
||||
* i --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* j --> Reset the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* k --> Inform the Interrupt Manager About the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA.
|
||||
* 03 to 13 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int acceleration_scheduler_direct(/*01*/volatile ap_uint<32> *mm2s_ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *dma_intr_in,
|
||||
/*03*/unsigned int dma_device_address,
|
||||
/*04*/unsigned int sobel_device_address,
|
||||
/*05*/unsigned int interrupt_manager_register_offset,
|
||||
/*06*/unsigned int apm_device_address,
|
||||
/*07*/unsigned int shared_apm_device_address,
|
||||
/*08*/unsigned int shared_metrics_address,
|
||||
/*09*/unsigned int image_cols,
|
||||
/*10*/unsigned int image_rows,
|
||||
/*11*/unsigned int host_mem_src_data_address,
|
||||
/*12*/unsigned int host_mem_dst_data_address,
|
||||
/*13*/unsigned int initiator_group
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The mm2s_ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=mm2s_ext_cfg
|
||||
|
||||
/*
|
||||
* The dma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=dma_intr_in
|
||||
|
||||
/*
|
||||
* The dma_device_address is a Register to Store the Base Address of the DMA that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The sobel_device_address is a Register to Store the Base Address of the Sobel Filter that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=sobel_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The apm_device_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_device_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the mm2s_ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The host_mem_src_data_address is a Register to Store the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=host_mem_src_data_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The host_mem_dst_data_address is a Register to Store the Destination Address that the DMA will Use to Write the Processed Image Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=host_mem_dst_data_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_cols bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_rows bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The initiator_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=initiator_group bundle=mm2s_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=mm2s_cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
|
||||
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
|
||||
|
||||
ap_uint<32> write_transactions; // Store the Write Transactions from the APM.
|
||||
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
|
||||
|
||||
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
|
||||
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
|
||||
|
||||
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
|
||||
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
|
||||
|
||||
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<1> dma_intr_in_value; // Used to Read the Last Value of the dma_intr_in Input Port.
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------
|
||||
* Enable the APM Counters
|
||||
* -----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
|
||||
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------
|
||||
* Setup and Start the Sobel Filter
|
||||
* --------------------------------
|
||||
*/
|
||||
|
||||
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
|
||||
data_register = image_cols;
|
||||
|
||||
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
|
||||
data_register = image_rows;
|
||||
|
||||
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the Sobel Filter.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start Device to DMA Transfer (S2MM)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (host_mem_dst_data_address) of the Core the Destination Address that the DMA will Use to Write the Processed Image Data.
|
||||
data_register = host_mem_dst_data_address;
|
||||
|
||||
//Write the Destination Address to the Destination Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the S2MM Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows) * 4;
|
||||
|
||||
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start DMA to Device Transfer (MM2S)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (host_mem_src_data_address) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
data_register = host_mem_src_data_address;
|
||||
|
||||
//Write the Source Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the MM2S Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows) * 4;
|
||||
|
||||
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Wait for a DMA Interrupt
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the dma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(dma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
dma_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Disable the APM Counters
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
|
||||
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------
|
||||
* Read the DMA S2MM Status Register to Get the IRQs (IOC, Delay, Error)
|
||||
* IOC Stands for: Interrupt On Complete
|
||||
* ---------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the S2MM Status Register of the DMA which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the Triggered Interrupts
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Write the new Value Back to the Status Register of the DMA which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&read_transactions, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&read_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&write_transactions, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&write_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&stream_packets, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&stream_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&gcc_lower, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory.
|
||||
memcpy(&gcc_upper, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------
|
||||
* Reset the APM Counters
|
||||
* ----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&initial_data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
|
||||
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
|
||||
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Release the Reset.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------------------------------------------------------
|
||||
* Inform the Interrupt Manager that this Core Has Completed the Acceleration Procedure
|
||||
* ------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (initiator_group) of the Core the Current Acceleration Group Number that this Core Belongs to.
|
||||
data_register = initiator_group;
|
||||
|
||||
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
|
||||
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (interrupt_manager_register_offset) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
open_project Acceleration_Scheduler_Direct
|
||||
|
||||
set_top acceleration_scheduler_direct
|
||||
|
||||
add_files acceleration_scheduler_direct.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Acceleration Scheduler Direct" -version "3.5"
|
||||
|
||||
exit
|
||||
@@ -0,0 +1,715 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "acceleration_scheduler_indirect.h"
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Registers of the Sobel Filter
|
||||
* -----------------------------
|
||||
*/
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_TX_OFFSET 0x00000000
|
||||
|
||||
/*
|
||||
* Rx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_RX_OFFSET 0x00000030
|
||||
|
||||
|
||||
/*
|
||||
* This Set of Registers are Applicable for both Channels of the DMA.
|
||||
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
|
||||
*/
|
||||
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
|
||||
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
|
||||
|
||||
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
|
||||
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
|
||||
|
||||
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM)
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
|
||||
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
|
||||
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
|
||||
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
|
||||
|
||||
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
|
||||
|
||||
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
|
||||
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
|
||||
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
|
||||
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
|
||||
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
|
||||
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
|
||||
|
||||
/*
|
||||
* acceleration_scheduler_indirect()
|
||||
*
|
||||
* The Hardware Funtionality of the Acceleration Scheduler Indirect Core.
|
||||
*
|
||||
* The Acceleration Scheduler Indirect Core is Part of the Acceleration Group Indirect and is Used to Manage the whole Acceleration Procedure.
|
||||
* It Interacts with the DMA, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
|
||||
* It, also, Interacts with the CDMA Fetch and CDMA Send Peripherals and the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* a --> Set the Scheduler Buffer of the Fetch Scheduler with Info that the Fetch Scheduler will Use to Start the CDMA Fetch Transfer
|
||||
* from the Host Memory to the FPGA's DDR3.
|
||||
* b --> Wait for the Fetch Scheduler to Send a Start Signal (start Input) when the CDMA Fetch Has Completed the Transfer.
|
||||
* c --> Enable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* d --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
|
||||
* e --> Setup and Start the Sobel Filter.
|
||||
* f --> Setup and Start the S2MM and MM2S DMA Transfers.
|
||||
* g --> Wait for an Interrupt by the DMA on Completion of the Transfer.
|
||||
* h --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
|
||||
* i --> Disable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* j --> Acknowledge the DMA Interrupt.
|
||||
* k --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* l --> Reset the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* m --> Set the Scheduler Buffer of the Send Scheduler with Info that the Send Scheduler will Use to Start the CDMA Send Transfer
|
||||
* from the Host Memory to the FPGA's DDR3.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Start Signals from the Fetch Scheduler.
|
||||
* 03 --------> Single Bit Input Used to Receive External Interrupts from the DMA.
|
||||
* 04 to 27 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int acceleration_scheduler_indirect(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *start,
|
||||
/*03*/volatile ap_uint<1> *dma_intr_in,
|
||||
/*04*/unsigned int scheduler_buffer_base_address_f,
|
||||
/*05*/unsigned int src_address_reg_offset_f,
|
||||
/*06*/unsigned int dst_address_reg_offset_f,
|
||||
/*07*/unsigned int data_size_reg_offset_f,
|
||||
/*08*/unsigned int offset_reg_offset_f,
|
||||
/*09*/unsigned int src_address_f,
|
||||
/*10*/unsigned int dst_address_f,
|
||||
/*11*/unsigned int offset_f,
|
||||
/*12*/unsigned int scheduler_buffer_base_address_s,
|
||||
/*13*/unsigned int src_address_reg_offset_s,
|
||||
/*14*/unsigned int dst_address_reg_offset_s,
|
||||
/*15*/unsigned int data_size_reg_offset_s,
|
||||
/*16*/unsigned int offset_reg_offset_s,
|
||||
/*17*/unsigned int src_address_s,
|
||||
/*18*/unsigned int dst_address_s,
|
||||
/*19*/unsigned int offset_s,
|
||||
/*20*/unsigned int dma_base_address,
|
||||
/*21*/unsigned int sobel_base_address,
|
||||
/*22*/unsigned int image_cols,
|
||||
/*23*/unsigned int image_rows,
|
||||
/*24*/unsigned int accel_group,
|
||||
/*25*/unsigned int shared_apm_base_address,
|
||||
/*26*/unsigned int shared_metrics_base_address,
|
||||
/*27*/unsigned int apm_base_address
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The start is a Single Bit Input which is Used to Receive External Start Signals from the Fetch Scheduler.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=start
|
||||
|
||||
/*
|
||||
* The dma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=dma_intr_in
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address_f is a Register to Store the Base Address of the Scheduler Buffer of the Fetch Scheduler.
|
||||
* This Base Address will be Needed by the ext_cfg AXI Master Interface to Access the Scheduler Buffer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Source Address that the CDMA Fetch will Read the Data from.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Destination Address that the CDMA Fetch will Write the Data to.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Data Size of the CDMA Fetch Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Offset from the Source and Destination Base Addresses that the CDMA Fetch will Use to Make the Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_reg_offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_f is a Register to Store the Source Address that the CDMA Fetch will Use to Read the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_f is a Register to Store the Destination Address that the CDMA Fetch will Use to Write the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_f is a Register to Store the Offset from the Source and Destination Base Addresses where the Image Data Might be Present.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_f bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address_s is a Register to Store the Base Address of the Scheduler Buffer of the Send Scheduler.
|
||||
* This Base Address will be Needed by the ext_cfg AXI Master Interface to Access the Scheduler Buffer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Source Address that the CDMA Send will Read the Data from.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Destination Address that the CDMA Send will Write the Data to.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Data Size of the CDMA Send Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
|
||||
* Write the Offset from the Source and Destination Base Addresses that the CDMA Send will Use to Make the Transfer.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_reg_offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_s is a Register to Store the Source Address that the CDMA Send will Use to Read the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_s is a Register to Store the Destination Address that the CDMA Send will Use to Write the Data.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_s is a Register to Store the Offset from the Source and Destination Base Addresses where the Image Data Might be Present.
|
||||
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_s bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dma_base_address is a Register to Store the Base Address of the DMA that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The sobel_base_address is a Register to Store the Base Address of the Sobel Filter that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=sobel_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_cols bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_rows bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The accel_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The apm_base_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=apm_base_address bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<1> start_value; // Used to Read the Last Value of the start Input Port.
|
||||
ap_uint<1> dma_intr_in_value; // Used to Read the Last Value of the dma_intr_in Input Port.
|
||||
|
||||
|
||||
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
|
||||
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
|
||||
|
||||
ap_uint<32> write_transactions; // Store the Write Transactions from the APM.
|
||||
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
|
||||
|
||||
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
|
||||
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
|
||||
|
||||
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
|
||||
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
* Set the Registers of the Scheduler Buffer of the Fetch Scheduler with the Source and Destination Addresses, the Offset and the Data Size.
|
||||
* The Fetch Scheduler will Use the above to Start the CDMA Fetch Transfer from the Host Memory to the FPGA's DDR3.
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (src_address_f) the Source Address for the CDMA Fetch Transfer.
|
||||
data_register = src_address_f;
|
||||
|
||||
//Write the Source Address for the CDMA Fetch Transfer to the Source Address Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + src_address_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (dst_address_f) the Destination Address for the CDMA Fetch Transfer.
|
||||
data_register = dst_address_f;
|
||||
|
||||
//Write the Destination Address for the CDMA Fetch Transfer to the Destination Address Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + dst_address_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (offset_f) the Offset Value for the CDMA Fetch Transfer.
|
||||
data_register = offset_f;
|
||||
|
||||
//Write the Offset Value for the CDMA Fetch Transfer to the Offset Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + offset_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Calculate from the Internal Registers (image_cols, image_rows) the Data Size for the CDMA Fetch Transfer.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Data Size for the CDMA Fetch Transfer to the Data Size Register in the Scheduler Buffer of the Fetch Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + data_size_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------
|
||||
* Wait for Start Signal from the Fetch Scheduler
|
||||
* ----------------------------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the start Input.
|
||||
start_value = *start;
|
||||
|
||||
//Keep Looping for as long as the start Input Does not Reach a Logic 1 Value.
|
||||
while(start_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the start Input.
|
||||
start_value = *start;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
start_value = 0;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------
|
||||
* Enable the APM Counters
|
||||
* -----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
|
||||
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------
|
||||
* Setup and Start the Sobel Filter
|
||||
* --------------------------------
|
||||
*/
|
||||
|
||||
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
|
||||
data_register = image_cols;
|
||||
|
||||
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
|
||||
data_register = image_rows;
|
||||
|
||||
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the Sobel Filter.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start Device to DMA Transfer (S2MM)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (src_address_s) of the Core the Destination Address that the DMA will Use to Write the Processed Image Data.
|
||||
//NOTE that the Destination Address of the DMA S2MM Transfer is the Source Address of the CDMA Send Transfer.
|
||||
data_register = src_address_s;
|
||||
|
||||
//Write the Destination Address to the Destination Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the S2MM Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start DMA to Device Transfer (MM2S)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (dst_address_f) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
//NOTE that the Destination Address of the CDMA Fetch Transfer is the Source Address of the DMA MM2S Transfer.
|
||||
data_register = dst_address_f;
|
||||
|
||||
//Write the Source Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the MM2S Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Wait for a DMA Interrupt
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the dma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(dma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the dma_intr_in Input.
|
||||
dma_intr_in_value = *dma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
dma_intr_in_value = 0;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Disable the APM Counters
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
|
||||
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------
|
||||
* Read the DMA S2MM Status Register to Get the IRQs (IOC, Delay, Error)
|
||||
* IOC Stands for: Interrupt On Complete
|
||||
* ---------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the S2MM Status Register of the DMA which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the Triggered Interrupts
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Write the new Value Back to the Status Register of the DMA which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_transactions, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_transactions, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_packets, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_lower, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_upper, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------
|
||||
* Reset the APM Counters
|
||||
* ----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&initial_data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
|
||||
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
|
||||
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Release the Reset.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
* Set the Registers of the Scheduler Buffer of the Send Scheduler with the Source and Destination Addresses, the Offset and the Data Size.
|
||||
* The Send Scheduler will Use the above to Start the CDMA Send Transfer from the Host Memory to the FPGA's DDR3.
|
||||
* -----------------------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (src_address_s) the Source Address for the CDMA Transfer.
|
||||
data_register = src_address_s;
|
||||
|
||||
//Write the Source Address for the CDMA Send Transfer to the Source Address Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + src_address_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (dst_address_s) the Destination Address for the CDMA Send Transfer.
|
||||
data_register = dst_address_s;
|
||||
|
||||
//Write the Destination Address for the CDMA Send Transfer to the Destination Address Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + dst_address_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Get from the Internal Register (offset_s) the Offset Value for the CDMA Send Transfer.
|
||||
data_register = offset_s;
|
||||
|
||||
//Write the Offset Value for the CDMA Send Transfer to the Offset Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + offset_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Calculate from the Internal Registers (image_cols, image_rows) the Data Size for the CDMA Send Transfer.
|
||||
data_register = (image_cols * image_rows * 4);
|
||||
|
||||
//Write the Data Size for the CDMA Send Transfer to the Data Size Register in the Scheduler Buffer of the Send Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + data_size_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
open_project Acceleration_Scheduler_Indirect
|
||||
|
||||
set_top acceleration_scheduler_indirect
|
||||
|
||||
add_files acceleration_scheduler_indirect.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Acceleration Scheduler Indirect" -version "2.0"
|
||||
|
||||
exit
|
||||
@@ -0,0 +1,476 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "acceleration_scheduler_sg_xdma.h"
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Registers of the Sobel Filter
|
||||
* -----------------------------
|
||||
*/
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
|
||||
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM)
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
|
||||
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
|
||||
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
|
||||
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
|
||||
|
||||
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
|
||||
|
||||
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
|
||||
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
|
||||
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
|
||||
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
|
||||
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
|
||||
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------
|
||||
* Registers of the DMA SG PCIe Scheduler
|
||||
* --------------------------------------
|
||||
*/
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL 0x00 // Control Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE 0x04 // Global Interrupt Enable Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER 0x08 // Interrupt Enable Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_ISR 0x0C // Interrupt Interrupt Status Register Offset.
|
||||
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_REQUESTED_DATA_SIZE_DATA 0x20 // Data Size Register for the Scatter/Gather Transfer.
|
||||
|
||||
/*
|
||||
* acceleration_scheduler_sg_xdma()
|
||||
*
|
||||
* The Hardware Funtionality of the Acceleration Scheduler Scatter/Gather Core.
|
||||
*
|
||||
* The Acceleration Scheduler Scatter/Gather Core is Part of the Acceleration Group Scatter/Gather and is Used to Manage the whole Acceleration Procedure.
|
||||
* It Interacts with the DMA SG PCIe Scheduler, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
|
||||
* It, also, Interacts with the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* a --> Enable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* b --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
|
||||
* c --> Setup and Start the Sobel Filter.
|
||||
* d --> Enable the Interrupts of the DMA SG PCIe Scheduler.
|
||||
* e --> Setup and Start the DMA SG PCIe Scheduler.
|
||||
* f --> Wait for an Interrupt by the DMA SG PCIe Scheduler on Completion of the Acceleration.
|
||||
* g --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
|
||||
* h --> Disable the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* i --> Clear and Re-Enable the Interrupts of the DMA SG PCIe Scheduler.
|
||||
* j --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* k --> Reset the Counters of the AXI Performance Monitor Unit (APM).
|
||||
* l --> Inform the Interrupt Manager About the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA SG PCIe Scheduler.
|
||||
* 03 to 11 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int acceleration_scheduler_sg_xdma(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *scheduler_intr_in,
|
||||
/*03*/unsigned int dma_sg_pcie_scheduler_base_address,
|
||||
/*04*/unsigned int sobel_device_address,
|
||||
/*05*/unsigned int interrupt_manager_register_offset,
|
||||
/*06*/unsigned int apm_device_address,
|
||||
/*07*/unsigned int shared_apm_device_address,
|
||||
/*08*/unsigned int shared_metrics_address,
|
||||
/*09*/unsigned int image_cols,
|
||||
/*10*/unsigned int image_rows,
|
||||
/*11*/unsigned int accel_group
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA SG PCIe Scheduler.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=scheduler_intr_in
|
||||
|
||||
/*
|
||||
* The dma_sg_pcie_scheduler_base_address is a Register to Store the Base Address of the DMA SG PCIe Scheduler that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_sg_pcie_scheduler_base_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The sobel_device_address is a Register to Store the Base Address of the Sobel Filter that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=sobel_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The apm_device_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_device_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_device_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_address bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_cols bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=image_rows bundle=mm2s_cfg
|
||||
|
||||
/*
|
||||
* The accel_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group bundle=mm2s_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=mm2s_cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
|
||||
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
|
||||
|
||||
ap_uint<32> write_transactions; // Store the Write Transactions from the APM
|
||||
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
|
||||
|
||||
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
|
||||
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
|
||||
|
||||
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
|
||||
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
|
||||
|
||||
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<1> scheduler_intr_in_value; // Used to Read the Last Value of the scheduler_intr_in_value Input Port.
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------
|
||||
* Enable the APM Counters
|
||||
* -----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
|
||||
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
|
||||
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------
|
||||
* Setup and Start the Sobel Filter
|
||||
* --------------------------------
|
||||
*/
|
||||
|
||||
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
|
||||
data_register = image_cols;
|
||||
|
||||
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
|
||||
data_register = image_rows;
|
||||
|
||||
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the Sobel Filter.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------
|
||||
* Enable the Interrupts for the DMA SG PCIe Scheduler
|
||||
* --------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with a Mask to Configure the IER that all the Available IRQs Should be Enabled.
|
||||
data_register = data_register | 0xFFFFFFFF;
|
||||
|
||||
//Write the new Value Back to the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = 0x1;
|
||||
|
||||
//Write the data_register Value to the Global Interrupt Enable Register (GIE) of the DMA SG PCIe Scheduler to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------
|
||||
* Setup and Start the DMA SG PCIe Scheduler
|
||||
* -----------------------------------------
|
||||
*/
|
||||
|
||||
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
|
||||
data_register = image_rows * image_cols * 4;
|
||||
|
||||
//Write the Transfer Size to the Requested Data Size Register of the DMA SG PCIe Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_REQUESTED_DATA_SIZE_DATA) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//Read the Control Register of the DMA SG PCIe Scheduler.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
|
||||
data_register = data_register & 0x80;
|
||||
data_register = data_register | 0x01;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA SG PCIe Scheduler so that the DMA SG PCIe Scheduler Gets Started.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------------
|
||||
* Wait for a DMA SG PCIe Scheduler Interrupt
|
||||
* ------------------------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the scheduler_intr_in_value Input.
|
||||
scheduler_intr_in_value = *scheduler_intr_in;
|
||||
|
||||
//Keep Looping for as long as the scheduler_intr_in_value Input Does not Reach a Logic 1 Value.
|
||||
while(scheduler_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the scheduler_intr_in Input.
|
||||
scheduler_intr_in_value = *scheduler_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
scheduler_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
|
||||
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* ------------------------
|
||||
* Disable the APM Counters
|
||||
* ------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
|
||||
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------
|
||||
* Clear and then Re-Enable the DMA SG PCIe Scheduler Interrupts
|
||||
* -------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Set a Mask to Clear the Interrupt Status Register of the DMA SG PCIe Scheduler.
|
||||
data_register = data_register | 0xFFFFFFFF;
|
||||
|
||||
//Clear the Interrupt Status Register of the DMA SG PCIe Scheduler According to the Previous Mask.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_ISR) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
//Read the Interrupt Enable Register of the DMA SG PCIe Scheduler
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with a Mask to Configure the IER that all the Available IRQs Should be Enabled.
|
||||
data_register = data_register | 0xFFFFFFFF;
|
||||
|
||||
//Write the new Value Back to the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = 0x1;
|
||||
//Write the data_register Value to the Global Interrupt Enable Register (GIE) of the DMA SG PCIe Scheduler to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_transactions, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&read_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_transactions, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&write_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_packets, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&stream_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_lower, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
|
||||
|
||||
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory
|
||||
memcpy(&gcc_upper, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------
|
||||
* Reset the APM Counters
|
||||
* ----------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the APM.
|
||||
memcpy(&initial_data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
|
||||
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
|
||||
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the APM to Release the Reset.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------------------------------------------------------
|
||||
* Inform the Interrupt Manager that this Core Has Completed the Acceleration Procedure
|
||||
* ------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (accel_group) of the Core the Current Acceleration Group Number that this Core Belongs to.
|
||||
data_register = accel_group;
|
||||
|
||||
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (interrupt_manager_register_offset) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
open_project Acceleration_Scheduler_SG_XDMA
|
||||
|
||||
set_top acceleration_scheduler_sg_xdma
|
||||
|
||||
add_files acceleration_scheduler_sg_xdma.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Acceleration Scheduler SG XDMA" -version "3.5"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/.keep
Normal file
@@ -0,0 +1,698 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "dma_sg_pcie_scheduler.h"
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_TX_OFFSET 0x00000000
|
||||
|
||||
/*
|
||||
* Rx Channel Registers Base Offset.
|
||||
*/
|
||||
#define XAXIDMA_RX_OFFSET 0x00000030
|
||||
|
||||
|
||||
/*
|
||||
* This Set of Registers are Applicable for both Channels of the DMA.
|
||||
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
|
||||
*/
|
||||
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
|
||||
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
|
||||
|
||||
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
|
||||
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
|
||||
|
||||
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
|
||||
|
||||
/*
|
||||
* serve_mm2s_transfer()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is as Follows:
|
||||
*
|
||||
* a --> Get from the Scatter/Gather List of the Source Memory the Physical Address of the Current Page to Transfer.
|
||||
* b --> Set the Address Translation Register of the PCIe Bridge's Source AXI BAR with the Physical Address of the Current Page to Transfer.
|
||||
* c --> Setup and Start the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
* 03 --> The Data Size of the MM2S Transfer.
|
||||
* 04 --> The Address of the Scatter/Gather List of the Source Memory.
|
||||
* 05 --> The Address of the BCIe Bridge's Source AXI BAR.
|
||||
* 06 --> The Offset in the PCIe Bridge of the Address Translation Register that Refers to the Source AXI BAR.
|
||||
* 07 --> The Current Value of the Page Counter in order to Know which Physical Address to Extract from the Source Scatter/Gather List.
|
||||
* 08 --> The Transfer Size for the Current Page which Might be Less than the Page Size.
|
||||
*/
|
||||
int serve_mm2s_transfer(/*01*/volatile ap_uint<32> *cfg,
|
||||
/*02*/unsigned int dma_device_address,
|
||||
/*03*/unsigned int src_data_size,
|
||||
/*04*/unsigned int sgl_address,
|
||||
/*05*/unsigned int axi_bar_src_address,
|
||||
/*06*/unsigned int axi_bar_src_cfg_address,
|
||||
/*07*/int page_counter,
|
||||
/*08*/ap_uint<32>current_transfer_size
|
||||
)
|
||||
{
|
||||
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Get the Physical Address of the Current Page of the Scatter/Gather List and Set the Source AXI BAR of the PCIe Bridge
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the 64 Bit Physical Address of the Current Page from the Source Scatter/Gather List.
|
||||
//The data_register_array[0] Holds the 32 LSBs of the Physical Address.
|
||||
//The data_register_array[1] Holds the 32 MSBs of the Physical Address.
|
||||
memcpy(data_register_array, (const ap_uint<32> *)(cfg + ((sgl_address + (page_counter * sizeof(ap_uint<64>))) / 4)), sizeof(ap_uint<64>));
|
||||
|
||||
data_register = data_register_array[0];
|
||||
//Write the 32 LSBs of the Physical Address of the Current Page to the Lower Register of the Source AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_src_cfg_address) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = data_register_array[1];
|
||||
//Write the 32 MSBs of the Physical Address of the Current Page to the Upper Register of the Source AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_src_cfg_address - 4) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start DMA to Device Transfer (MM2S)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (axi_bar_src_address) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
|
||||
//The Source Address of the DMA MM2S Channel will be the Source AXI BAR which Corresponds to the Physical Address of the Current Page.
|
||||
data_register = axi_bar_src_address;
|
||||
|
||||
//Write the Source Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the MM2S Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), ¤t_transfer_size, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* serve_s2mm_transfer()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is as Follows:
|
||||
*
|
||||
* a --> Get from the Scatter/Gather List of the Destination Memory the Physical Address of the Current Page to Transfer.
|
||||
* b --> Set the Address Translation Register of the PCIe Bridge's Destination AXI BAR with the Physical Address of the Current Page to Transfer.
|
||||
* c --> Setup and Start the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
* 03 --> The Data Size of the S2MM Transfer.
|
||||
* 04 --> The Address of the Scatter/Gather List of the Destination Memory.
|
||||
* 05 --> The Address of the BCIe Bridge's Destination AXI BAR.
|
||||
* 06 --> The Offset in the PCIe Bridge of the Address Translation Register that Refers to the Source AXI BAR.
|
||||
* 07 --> The Current Value of the Page Counter in order to Know which Physical Address to Extract from the Source Scatter/Gather List.
|
||||
* 08 --> The Transfer Size for the Current Page which Might be Less than the Page Size.
|
||||
*/
|
||||
int serve_s2mm_transfer(/*01*/volatile ap_uint<32> *cfg,
|
||||
/*02*/unsigned int dma_device_address,
|
||||
/*03*/unsigned int src_data_size,
|
||||
/*04*/unsigned int sgl_address,
|
||||
/*05*/unsigned int axi_bar_dst_address,
|
||||
/*06*/unsigned int axi_bar_dst_cfg_address,
|
||||
/*07*/int page_counter,
|
||||
/*08*/ap_uint<32>current_transfer_size)
|
||||
{
|
||||
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
* Get the Physical Address of the Current Page of the Scatter/Gather List and Set the Destination AXI BAR of the PCIe Bridge
|
||||
* ---------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Get the 64 Bit Physical Address of the Current Page from the Destination Scatter/Gather List.
|
||||
//The data_register_array[0] Holds the 32 LSBs of the Physical Address.
|
||||
//The data_register_array[1] Holds the 32 MSBs of the Physical Address.
|
||||
memcpy(data_register_array, (const ap_uint<32> *)(cfg + ((sgl_address + (page_counter * sizeof(ap_uint<64>))) / 4)), sizeof(ap_uint<64>));
|
||||
|
||||
data_register = data_register_array[0];
|
||||
//Write the 32 LSBs of the Physical Address of the Current Page to the Lower Register of the Destination AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_dst_cfg_address) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
data_register = data_register_array[1];
|
||||
//Write the 32 MSBs of the Physical Address of the Current Page to the Upper Register of the Destination AXI BAR.
|
||||
memcpy((ap_uint<32> *)(cfg + (axi_bar_dst_cfg_address - 4) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------
|
||||
* Setup and Start Device to DMA Transfer (S2MM)
|
||||
* ---------------------------------------------
|
||||
*/
|
||||
|
||||
//Get from the Internal Register (axi_bar_dst_address) of the Core the Destination Address that the DMA will Use to Read the Initial Image Data.
|
||||
//The Destination Address of the DMA S2MM Channel will be the Destination AXI BAR which Corresponds to the Physical Address of the Current Page.
|
||||
data_register = axi_bar_dst_address;
|
||||
|
||||
//Write the Destination Address to the Source Register of the DMA.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the S2MM Control Register of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
|
||||
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), ¤t_transfer_size, sizeof(ap_uint<32>));
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* serve_mm2s_interrupt()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is to Acknowledge Triggered Interrupts on the MM2S Channel of the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
*/
|
||||
int serve_mm2s_interrupt(volatile ap_uint<32> *cfg, unsigned int dma_device_address)
|
||||
{
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
//Read the DMA MM2S Status Register of the DMA to Get the IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the MM2S Status Register of the DMA which Acknowledges the Triggered Interrupts on the MM2S Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* serve_s2mm_interrupt()
|
||||
*
|
||||
* Invoked from the dma_sg_pcie_scheduler() Top Function.
|
||||
*
|
||||
* The Usability of this Function is to Acknowledge Triggered Interrupts on the S2MM Channel of the DMA.
|
||||
*
|
||||
* The Function Parameters are:
|
||||
*
|
||||
* 01 --> The AXI Master Interface of the Core (cfg).
|
||||
* 02 --> The Base Address of the DMA.
|
||||
*/
|
||||
int serve_s2mm_interrupt(volatile ap_uint<32> *cfg, unsigned int dma_device_address)
|
||||
{
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
//Read the DMA S2MM Status Register of the DMA to Get the IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the S2MM Status Register of the DMA which Acknowledges the Triggered Interrupts on the S2MM Channel.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* dma_sg_pcie_scheduler() Top Function
|
||||
*
|
||||
* The Hardware Funtionality of the DMA SG PCIe Scheduler Core.
|
||||
*
|
||||
* The DMA SG PCIe Scheduler Core is Part of the Acceleration Group Scatter/Gather.
|
||||
* It is Used to Manage the the MM2S and S2MM Channels of the DMA when a Scatter/Gather List is Required to Transfer the Image Data.
|
||||
* The DMA SG PCIe Scheduler Interacts with the DMA of the Acceleration Group Scatter/Gather and the Configuration AXI Interface of the PCIe Bridge.
|
||||
*
|
||||
* The Sequential Steps of the Core's Functionality are as Follows:
|
||||
*
|
||||
* a --> Calculate the Number of Pages to Transfer for the MM2S and S2MM Channels of the DMA.
|
||||
* b --> Enable the DMA MM2S Interrupts.
|
||||
* c --> Enable the DMA S2MM Interrupts.
|
||||
* d --> Start a Page Transfer over the MM2S Channel (See the serve_mm2s_transfer() Function for Details).
|
||||
* e --> Start a Page Transfer over the S2MM Channel (See the serve_s2mm_transfer() Function for Details).
|
||||
* f --> Loop for as long as Both Channels Require to Complete the Transfer of all the Pages (both_done).
|
||||
* g --> In Every Loop Check if Either the MM2S or the S2MM Channels Have Triggered an Interrupt on Completion of the Page Transfer.
|
||||
* h --> If any of the Channels Triggers an Interrupt then Clear the Channel's Interrupt
|
||||
* (See the serve_mm2s_interrupt() and serve_s2MM_interrupt Functions for Details)
|
||||
* and Start the Channel's next Page Transfer.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA MM2S Channel.
|
||||
* 03 --------> Single Bit Input Used to Receive External Interrupts from the DMA S2MM Channel.
|
||||
* 04 to 12 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int dma_sg_pcie_scheduler(/*01*/volatile ap_uint<32> *cfg,
|
||||
/*02*/volatile ap_uint<1> *mm2s_intr_in,
|
||||
/*03*/volatile ap_uint<1> *s2mm_intr_in,
|
||||
/*04*/unsigned int dma_device_address,
|
||||
/*05*/unsigned int requested_data_size,
|
||||
/*06*/unsigned int page_size,
|
||||
/*07*/unsigned int mm2s_sgl_address,
|
||||
/*08*/unsigned int axi_bar_src_address,
|
||||
/*09*/unsigned int axi_bar_src_cfg_address,
|
||||
/*10*/unsigned int s2mm_sgl_address,
|
||||
/*11*/unsigned int axi_bar_dst_address,
|
||||
/*12*/unsigned int axi_bar_dst_cfg_address
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=cfg
|
||||
|
||||
/*
|
||||
* The mm2s_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA MM2S Channel.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=mm2s_intr_in
|
||||
|
||||
/*
|
||||
* The s2mm_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA S2MM Channel.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=s2mm_intr_in
|
||||
|
||||
/*
|
||||
* The dma_device_address is a Register to Store the Base Address of the DMA that this Core
|
||||
* will Need to Access through the cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dma_device_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The requested_data_size is a Register to Store the Size of the Data that will be Transferred.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=requested_data_size bundle=cfg
|
||||
|
||||
/*
|
||||
* The page_size is a Register to Store the Size of each Page(Usually 4K in Linux) that will be Transferred.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=page_size bundle=cfg
|
||||
|
||||
/*
|
||||
* The mm2s_sgl_address is a Register to Store the Address of the Scatter/Gather List of the Source Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=mm2s_sgl_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_src_address is a Register to Store the Address of the AXI BAR that the DMA will Use to Read the Source Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_src_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_src_cfg_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_src_cfg_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The s2mm_sgl_address is a Register to Store the Address of the Scatter/Gather List of the Destination Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=s2mm_sgl_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_dst_address is a Register to Store the Address of the AXI BAR that the DMA will Use to Write the Destination Data.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_dst_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_src_cfg_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_dst_cfg_address bundle=cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=cfg
|
||||
|
||||
|
||||
|
||||
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
|
||||
ap_uint<32> mm2s_data_size; // The Data Size to Transfer for a Page of the MM2S Channel (The Last Page may not be Full).
|
||||
ap_uint<32> s2mm_data_size; // The Data Size to Transfer for a Page of the S2MM Channel (The Last Page may not be Full).
|
||||
|
||||
ap_uint<1> dma_mm2s_intr_in_value; // Used to Read the Last Value of the dma_mm2s_intr_in_value Input Port.
|
||||
ap_uint<1> dma_s2mm_intr_in_value; // Used to Read the Last Value of the dma_s2mm_intr_in_value Input Port.
|
||||
|
||||
int mm2s_pages_counter = 0; // Used to Count the Number of Tranferred Pages for the MM2S Channel.
|
||||
int s2mm_pages_counter = 0; // Used to Count the Number of Tranferred Pages for the S2MM Channel.
|
||||
|
||||
int mm2s_pages_number; // The Number of Pages to Transfer for the MM2S Channel.
|
||||
int s2mm_pages_number; // The Number of Pages to Transfer for the S2MM Channel.
|
||||
int mm2s_remaining_bytes; // Used to Count the Remaining Bytes of the MM2S Transfer.
|
||||
int s2mm_remaining_bytes; // Used to Count the Remaining Bytes of the S2MM Transfer.
|
||||
|
||||
ap_uint<32> current_transfer_size;
|
||||
|
||||
int both_done = 0; // Flag to Know When Both Channels (MM2S/S2MM) are Done.
|
||||
|
||||
|
||||
|
||||
//Divide the Size of the Data to Transfer by the Page Size to Get the Number of Pages to Transfer over the MM2S and S2mm Channels.
|
||||
mm2s_pages_number = requested_data_size / page_size;
|
||||
s2mm_pages_number = requested_data_size / page_size;
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the MM2S Pages Number Multiplied by the Page Size is Less than the Initial Data Size
|
||||
* then there is One More Page with Less Data than the Page Size.
|
||||
*
|
||||
* So, Increment mm2s_pages_number Variable by 1.
|
||||
*/
|
||||
if((mm2s_pages_number * page_size) < requested_data_size)
|
||||
{
|
||||
mm2s_pages_number = mm2s_pages_number + 1;
|
||||
}
|
||||
|
||||
|
||||
//Initialize the Remaining Bytes for the MM2S Channel to be Equal to the Data Transfer Size.
|
||||
mm2s_remaining_bytes = requested_data_size;
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the S2MM Pages Number Multiplied by the Page Size is Less than the Initial Data Size
|
||||
* then there is One More Page with Less Data than the Page Size.
|
||||
*
|
||||
* So, Increment s2mm_pages_number Variable by 1.
|
||||
*/
|
||||
if((s2mm_pages_number * page_size) < requested_data_size)
|
||||
{
|
||||
s2mm_pages_number = s2mm_pages_number + 1;
|
||||
}
|
||||
|
||||
|
||||
//Initialize the Remaining Bytes for the S2MM Channel to be Equal to the Data Transfer Size.
|
||||
s2mm_remaining_bytes = requested_data_size;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------
|
||||
* Enable the DMA MM2S Interrupts (DMA to Device)
|
||||
* ----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the MM2S Channel of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXIDMA_IRQ_ERROR_MASK | XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA to Enable the MM2S Interrupts.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------
|
||||
* Enable the DMA S2MM Interrupts (Device to DMA)
|
||||
* ----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the S2MM Channel of the DMA.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXIDMA_IRQ_ERROR_MASK | XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the DMA to Enable the S2MM Interrupts.
|
||||
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the MM2S Channel.
|
||||
*/
|
||||
if(mm2s_remaining_bytes >= page_size)
|
||||
{
|
||||
mm2s_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the MM2S Channel.
|
||||
*/
|
||||
if((mm2s_remaining_bytes > 0) && (mm2s_remaining_bytes < page_size))
|
||||
{
|
||||
mm2s_data_size = mm2s_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = mm2s_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the MM2S Channel
|
||||
//The Transfer Can be of Size Equal to a whole Page Size or Just the Remaining Bytes According to the current_transfer_size Variable.
|
||||
serve_mm2s_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
mm2s_sgl_address,
|
||||
axi_bar_src_address,
|
||||
axi_bar_src_cfg_address,
|
||||
0,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the MM2S Channel's Remaining Bytes According to the mm2s_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
mm2s_remaining_bytes = mm2s_remaining_bytes - mm2s_data_size;
|
||||
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the S2MM Channel.
|
||||
*/
|
||||
if(s2mm_remaining_bytes >= page_size)
|
||||
{
|
||||
s2mm_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the S2MM Channel.
|
||||
*/
|
||||
if((s2mm_remaining_bytes > 0) && (s2mm_remaining_bytes < page_size))
|
||||
{
|
||||
s2mm_data_size = s2mm_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = s2mm_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the S2MM Channel
|
||||
//The Transfer Can be of Size Equal to a whole Page Size or Just the Remaining Bytes According to the current_transfer_size Variable.
|
||||
serve_s2mm_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
s2mm_sgl_address,
|
||||
axi_bar_dst_address,
|
||||
axi_bar_dst_cfg_address,
|
||||
0,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the S2MM Channel's Remaining Bytes According to the s2mm_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
s2mm_remaining_bytes = s2mm_remaining_bytes - s2mm_data_size;
|
||||
|
||||
|
||||
//Start Looping for as Long as the Rest of the Pages for the MM2S and S2MM Channels are Being Transferred.
|
||||
while(both_done < 2)
|
||||
{
|
||||
//Read the Current State of the mm2s_intr_in Input.
|
||||
dma_mm2s_intr_in_value = *mm2s_intr_in;
|
||||
|
||||
//Read the Current State of the s2mm_intr_in Input.
|
||||
dma_s2mm_intr_in_value = *s2mm_intr_in;
|
||||
|
||||
/*
|
||||
* If we Have an Interrupt from the MM2S Channel than we Should Clear the Interrupt and Start the Next Page Transfer.
|
||||
*/
|
||||
if(dma_mm2s_intr_in_value == 1)
|
||||
{
|
||||
//Acknowledge the Triggered Interrupt of the DMA MM2S Channel.
|
||||
serve_mm2s_interrupt(cfg, dma_device_address);
|
||||
|
||||
//If the MM2S Pages Counter of the Current Page Has Not Reached the Total Number of Pages then Proceed to Start the Next Page Transfer.
|
||||
if(mm2s_pages_counter < (mm2s_pages_number - 1))
|
||||
{
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the MM2S Channel.
|
||||
*/
|
||||
if(mm2s_remaining_bytes >= page_size)
|
||||
{
|
||||
mm2s_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the MM2S Channel.
|
||||
*/
|
||||
if((mm2s_remaining_bytes > 0) && (mm2s_remaining_bytes < page_size))
|
||||
{
|
||||
mm2s_data_size = mm2s_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = mm2s_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the MM2S Channel According to the current_transfer_size Variable.
|
||||
serve_mm2s_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
mm2s_sgl_address,
|
||||
axi_bar_src_address,
|
||||
axi_bar_src_cfg_address,
|
||||
mm2s_pages_counter + 1,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the MM2S Channel's Remaining Bytes According to the mm2s_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
mm2s_remaining_bytes = mm2s_remaining_bytes - mm2s_data_size;
|
||||
}
|
||||
|
||||
//Increment the MM2S Pages Counter to Keep Track of the Remaining MM2S Pages to Transfer.
|
||||
mm2s_pages_counter++;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* If we Have an Interrupt from the S2MM Channel than we Should Clear the Interrupt and Start the Next Page Transfer.
|
||||
*/
|
||||
if(dma_s2mm_intr_in_value == 1)
|
||||
{
|
||||
//Acknowledge the Triggered Interrupt of the DMA S2MM Channel.
|
||||
serve_s2mm_interrupt(cfg, dma_device_address);
|
||||
|
||||
//If the S2MM Pages Counter of the Current Page Has Not Reached the Total Number of Pages then Proceed to Start the Next Page Transfer.
|
||||
if(s2mm_pages_counter < (s2mm_pages_number - 1))
|
||||
{
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the S2MM Channel.
|
||||
*/
|
||||
if(s2mm_remaining_bytes >= page_size)
|
||||
{
|
||||
s2mm_data_size = page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the S2MM Channel.
|
||||
*/
|
||||
if((s2mm_remaining_bytes > 0) && (s2mm_remaining_bytes < page_size))
|
||||
{
|
||||
s2mm_data_size = s2mm_remaining_bytes;
|
||||
}
|
||||
|
||||
current_transfer_size = s2mm_data_size;
|
||||
|
||||
//Start a DMA Page Transfer over the S2MM Channel According to the current_transfer_size Variable.
|
||||
serve_s2mm_transfer(cfg,
|
||||
dma_device_address,
|
||||
requested_data_size,
|
||||
s2mm_sgl_address,
|
||||
axi_bar_dst_address,
|
||||
axi_bar_dst_cfg_address,
|
||||
s2mm_pages_counter + 1,
|
||||
current_transfer_size);
|
||||
|
||||
//Decrement the S2MM Channel's Remaining Bytes According to the s2mm_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
|
||||
s2mm_remaining_bytes = s2mm_remaining_bytes - s2mm_data_size;
|
||||
|
||||
}
|
||||
|
||||
//Increment the S2MM Pages Counter to Keep Track of the Remaining S2MM Pages to Transfer.
|
||||
s2mm_pages_counter++;
|
||||
|
||||
}
|
||||
|
||||
//If the MM2S Pages Counter Has Reached the Total Number of Pages then the MM2S Channel Has Finished the Data Transfer.
|
||||
if(mm2s_pages_counter == (mm2s_pages_number))
|
||||
{
|
||||
//Increment the mm2s_pages_counter Variable so that will not Enter the Current if Condition Again.
|
||||
mm2s_pages_counter++;
|
||||
|
||||
//Increment the both_done Variable on Behalf of the MM2S Channel.
|
||||
//The both_done Variable will ONLY be Incremented Once on Behalf of the MM2S Channel because we will not Enter this if Condition Again.
|
||||
//When the S2MM Channel, also, Increments the both_done Variable the Data Transfer is Completed (both_done =2).
|
||||
both_done++;
|
||||
}
|
||||
|
||||
//If the S2MM Pages Counter Has Reached the Total Number of Pages then the S2MM Channel Has Finished the Data Transfer.
|
||||
if(s2mm_pages_counter == (s2mm_pages_number))
|
||||
{
|
||||
//Increment the s2mm_pages_counter Variable so that will not Enter the Current if Condition Again.
|
||||
s2mm_pages_counter++;
|
||||
|
||||
//Increment the both_done Variable on Behalf of the S2MM Channel.
|
||||
//The both_done Variable will ONLY be Incremented Once on Behalf of the S2MM Channel because we will not Enter this if Condition Again.
|
||||
//When the MM2S Channel, also, Increments the both_done Variable the Data Transfer is Completed (both_done =2).
|
||||
both_done++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Reset the Variables.
|
||||
dma_mm2s_intr_in_value = 0;
|
||||
dma_s2mm_intr_in_value = 0;
|
||||
both_done = 0;
|
||||
|
||||
return 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
17
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project DMA_SG_PCIe_Scheduler
|
||||
|
||||
set_top dma_sg_pcie_scheduler
|
||||
|
||||
add_files dma_sg_pcie_scheduler.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "DMA SG PCIe Scheduler" -version "1.0"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/.keep
Normal file
513
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.cpp
Normal file
513
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.cpp
Normal file
@@ -0,0 +1,513 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "fetch_scheduler.h"
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
#define XAXICDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXICDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
|
||||
|
||||
#define XAXICDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXICDMA_DSTADDR_OFFSET 0x00000020 // Destination Address Register.
|
||||
#define XAXICDMA_BTT_OFFSET 0x00000028 // Bytes to Transfer Register.
|
||||
|
||||
|
||||
|
||||
#define XAXICDMA_CR_RESET_MASK 0x00000004 // Reset CDMA Mask.
|
||||
|
||||
#define XAXICDMA_XR_IRQ_IOC_MASK 0x00001000 // Interrupt On Completion (IOC) Mask.
|
||||
#define XAXICDMA_XR_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ALL_MASK 0x00007000 // All Interrupt Mask.
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM/Shared Timer)
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 //Global Clock Counter 32 to 63 Bits (Upper).
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 //Global Clock Counter 0 to 31 Bits (Lower).
|
||||
|
||||
|
||||
/*
|
||||
* fetch_scheduler()
|
||||
*
|
||||
* The Hardware Funtionality of the Fetch Scheduler Core.
|
||||
*
|
||||
* The Fetch Scheduler Core Does not Belong to Any Particular Acceleration Group but it is Used by ALL(4) the Acceleration Groups Indirect (AGIs).
|
||||
* The Responsibility of this Core is to Manage the Procedure of Fetching Data to the DDR3 Memory that will be Processed by the AGIs.
|
||||
* It Checks its Scheduler Buffer in Round Robin for new Transfer Requests by any of the AGIs.
|
||||
* If it Finds Information for new Transfer it Starts the CDMA Fetch Core to Transfer Image Data from the Host's Memory to the FPGA's DDR3.
|
||||
* The Corresponding AGIs will be then Signaled by the Fetch Scheduler to Process the Image Data.
|
||||
*
|
||||
* When an AGI wants to Request Image Data from the Fetch Scheduler it Has to Write the Source and Destination Addresses as well as the Transfer Size
|
||||
* and, if Required, an Address Offset to the Scheduler Buffer that Belongs to the Fetch Scheduler.
|
||||
*
|
||||
* The Scheduler Buffer Has 4 Sets of Registers with 4 Registers for each Set.
|
||||
* The 4 Registers are Used to Store the Source Address, the Destination Address, the Transfer Size and an Address Offset (If Required) Respectively.
|
||||
* Each Set Corresponds to One of the 4 AGIs.
|
||||
*
|
||||
* When an AGI Writes the Above Information to the Scheduler Buffer, the Fetch Scheduler Starts a CDMA Transfer Accordingly
|
||||
* to Fetch the Image Data in the FPGA's DDR3.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* Start a for Loop of 4 Iterations where in each Iteration we Check for new CDMA Transfer Requests by each of the 4 AGIs Respectively.
|
||||
*
|
||||
* a --> Read the Data Size Register from the Current Set of Registers of the Scheduler Buffer.
|
||||
* If there is a Non-Zero Value then we Know that the Corresponding AGI Has Written the Required
|
||||
* Info (Source/Destination Address, Transfer Size, Addrress Offset) in Order to Request a Transfer by the CDMA Fetch.
|
||||
* If there is a Zero Value then we Check the Data Size Register of the Next Set for a Transfer Request by the Next AGI.
|
||||
* b --> Enable the Interrupts on the CDMA Fetch Core.
|
||||
* c --> Setup the CDMA with the Source and Destination Addresses.
|
||||
* If the Source Data Should be Fetched through the PCIe Bridge then Get the Source Address from the Scheduler Buffer and Set the
|
||||
* Address Translation Register of the Corresponding AXI BAR of the PCIe Bridge with this Address.
|
||||
* Then Set the Source Address Register of the CDMA Fetch Core to be the Corresponding AXI BAR.
|
||||
* If the Source Data Should not be Fetched through the PCIe Bridge then Just Set the Source Address Register of the CDMA Fetch Core
|
||||
* with the Source Address of the Scheduler Buffer.
|
||||
* d --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Fetch Transfer Started.
|
||||
* e --> Setup the Bytes to Transfer Register with the Transfer Size which Triggers the CDMA Fetch Transfer.
|
||||
* f --> Wait for an Interrupt by the CDMA Fetch on Completion of the Transfer.
|
||||
* g --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Fetch Transfer Ended.
|
||||
* h --> Acknowledge the CDMA Fetch Interrupt.
|
||||
* i --> Reset the CDMA Fetch Core.
|
||||
* j --> Re-Enable the Interrupts on the CDMA Fetch Core.
|
||||
* k --> Clear the Set of Registers of the Scheduler Buffer that Refer to the Current AGI.
|
||||
* l --> Send a Start Signal to the Acceleration Scheduler Indirect of the Corresponding AGI to Initiate the Acceleration Procedure.
|
||||
*
|
||||
* Repeat the Above Steps (a to l) for the Next Set of Registers of the Scheduler Buffer.
|
||||
*
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the CDMA Fetch Core.
|
||||
* 03 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI0.
|
||||
* 04 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI1.
|
||||
* 05 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI2.
|
||||
* 06 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI3.
|
||||
* 07 to 19 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*
|
||||
* NOTE datr in pcie_ctl_datr_address Stands for Dynamic Address Translator Register.
|
||||
*/
|
||||
int fetch_scheduler(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *cdma_intr_in,
|
||||
/*03*/volatile ap_uint<1> *start_0,
|
||||
/*04*/volatile ap_uint<1> *start_1,
|
||||
/*05*/volatile ap_uint<1> *start_2,
|
||||
/*06*/volatile ap_uint<1> *start_3,
|
||||
/*07*/unsigned int cdma_base_address,
|
||||
/*08*/unsigned int scheduler_buffer_base_address,
|
||||
/*09*/unsigned int src_address_first_reg_offset,
|
||||
/*10*/unsigned int dst_address_first_reg_offset,
|
||||
/*11*/unsigned int data_size_first_reg_offset,
|
||||
/*12*/unsigned int offset_first_reg_offset,
|
||||
/*13*/unsigned int step_offset,
|
||||
/*14*/unsigned int shared_apm_base_address,
|
||||
/*15*/unsigned int shared_metrics_base_address,
|
||||
/*16*/unsigned int axi_bar_base_address,
|
||||
/*17*/unsigned int pcie_ctl_datr_address,
|
||||
/*17*/unsigned int pcie_mode,
|
||||
/*19*/unsigned int accel_group_jump
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The cdma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the CDMA Fetch Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=cdma_intr_in
|
||||
|
||||
/*
|
||||
* The start_0 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI0.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_0
|
||||
|
||||
/*
|
||||
* The start_1 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI1.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_1
|
||||
|
||||
/*
|
||||
* The start_2 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI2.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_2
|
||||
|
||||
/*
|
||||
* The start_3 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI3.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_ovld port=start_3
|
||||
|
||||
/*
|
||||
* The cdma_base_address is a Register to Store the Base Address of the CDMA Fetch that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=cdma_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address is a Register to Store the Base Address of the Scheduler Buffer that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_first_reg_offset is a Register to Store the Address Offset where the Source Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_first_reg_offset is a Register to Store the Address Offset where the Destination Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_first_reg_offset is a Register to Store the Address Offset where the Transfer Size Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_first_reg_offset is a Register to Store the Address Offset where the Offset Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The step_offset is a Register to Store the Number of Bytes to Jump inside the Scheduler Buffer
|
||||
* in order to Locate the Next Set of Registers.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=step_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_base_address is a Register to Store the Base Address of the Source AXI BAR of the PCIe Bridge that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Read the Image Data over the PCIe Bus.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_ctl_datr_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_ctl_datr_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_mode is a Register to Store a Value (0 or 1) that Indicates whether we Access the Source Image Data through the PCIe Bridge or not.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_mode bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The accel_group_jump is a Register to Store a Value that Helps to Access the Correct Metrics Structure in the Metrics Memory in order
|
||||
* to Store the Time Metrics that Refer to the Current AGI.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group_jump bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
int repeat;
|
||||
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> irq; // Used to Temporalily Store the IRQ Mask.
|
||||
ap_uint<32> source_address_register; // Used to Temporalily Store the Value of the Source Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> destination_address_register; // Used to Temporalily Store the Value of the Destination Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> data_size_register; // Used to Temporalily Store the Value of the Data Size Register of the Scheduler Buffer.
|
||||
ap_uint<32> offset_register; // Used to Temporalily Store the Value of the Offset Register of the Scheduler Buffer.
|
||||
|
||||
ap_uint<32> address; // Used to Calculate an Address along with an Offset.
|
||||
|
||||
|
||||
ap_uint<1> cdma_intr_in_value; // Used to Read the Last Value of the cdma_intr_in_value Input Port.
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_gcc_l; // Store the CDMA Fetch Transfer Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_fetch_time_start_gcc_u; // Store the CDMA Fetch Transfer Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> cdma_fetch_time_end_gcc_l; // Store the CDMA Fetch Transfer End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_fetch_time_end_gcc_u; // Store the CDMA Fetch Transfer End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
/*
|
||||
* Start an Infinite Loop.
|
||||
*/
|
||||
while(1)
|
||||
{
|
||||
|
||||
/*
|
||||
* Make 4 Iterations and each Time Check the Current Set of Registers of the Scheduler Buffer for a New CDMA Fetch Transfer Request
|
||||
* by the AGI that Refers to the Current Set of Registers.
|
||||
*/
|
||||
for(repeat = 0; repeat < 4; repeat++)
|
||||
{
|
||||
//Read the Data Size Register of the Current Set of Registers of the Scheduler Buffer.
|
||||
memcpy(&data_size_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//If the Data Size Register is not Empty then we Have a New CDMA Fetch Transfer Request.
|
||||
//Else the Fetch Scheduler will Check the Data Size Register of the Next Set in the Next Iteration.
|
||||
if(data_size_register != 0)
|
||||
{
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------
|
||||
* Enable the Interrupts on the CDMA Fetch Core
|
||||
* --------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Fetch Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Fetch Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------
|
||||
* Setup the Source and Destination Address Registers of the CDMA Fetch Core
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//If the PCIe Mode is Enabled then the Source Data Should be Read through the PCIe Bridge.
|
||||
//This Mode Requires to Set the Address Tranlation Register of the Source AXI BAR of the PCI Bridge.
|
||||
if(pcie_mode == 1)
|
||||
{
|
||||
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Address Tranlation Register of the Source AXI BAR of the PCI Bridge with the Source Physical Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (pcie_ctl_datr_address) / 4), &source_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Source Address Register of the CDMA Fetch Core to be the Specified Source AXI BAR along with a Possible Offset.
|
||||
address = axi_bar_base_address + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
}
|
||||
//If the PCIe Mode is Disabled there is no Need to Set the Address Translation Registers of the PCIe Bridge.
|
||||
else
|
||||
{
|
||||
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Source Address Register of the CDMA Fetch Core with the Source Address along with the Offset Read from the Scheduler Buffer.
|
||||
address = source_address_register + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
|
||||
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Destination Address Register of the CDMA Fetch Core with the Destination Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &destination_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ----------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Fetch Transfer Start Time
|
||||
* ----------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Fetch Transfer Start Time.
|
||||
memcpy(&cdma_fetch_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Fetch Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_START_L_OFFSET) / 4), &cdma_fetch_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Fetch Transfer Start Time.
|
||||
memcpy(&cdma_fetch_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Fetch Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_START_U_OFFSET) / 4), &cdma_fetch_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------
|
||||
* Setup the Bytes To Transfer (BTT) Register of the CDMA Fetch Core which Triggers the Transfer
|
||||
* ---------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Set the Bytes To Tranfer Register of the CDMA Fetch Core with the Transfer Size in Bytes.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_BTT_OFFSET) / 4), &data_size_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Wait for a CDMA Interrupt
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the cdma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(cdma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
cdma_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Fetch Tranfer End Time
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Fetch Tranfer End Time.
|
||||
memcpy(&cdma_fetch_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Fetch Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_END_L_OFFSET) / 4), &cdma_fetch_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Fetch Tranfer End Time.
|
||||
memcpy(&cdma_fetch_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Fetch Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_END_U_OFFSET) / 4), &cdma_fetch_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the CDMA Fetch Interrupt
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Status Register of the CDMA Fetch Core which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXICDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
irq = data_register & XAXICDMA_XR_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the Status Register of the CDMA Fetch Core which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), &irq, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Reset the CDMA Fetch Core
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Write the Reset Mask to the Control Register of the CDMA Fetch Core in order to Reset the Core.
|
||||
data_register = XAXICDMA_CR_RESET_MASK;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -----------------------------------------------
|
||||
* Re-Enable the Interrupts on the CDMA Fetch Core
|
||||
* -----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Fetch Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Fetch Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* Reset to Zero the 4 Registers of the Current Set of Registers of the Scheduler Buffer
|
||||
*/
|
||||
data_register = 0;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* Each Iteration Refers to a Specific AGI.
|
||||
* Check the Current Iteration Value and Start the Acceleration Scheduler Indirect of the Correct AGI.
|
||||
*/
|
||||
if(repeat == 0)
|
||||
{
|
||||
//Trigger the start_0 Signal for one Clock Cycle.
|
||||
*start_0 = 0;
|
||||
*start_0 = 1;
|
||||
}
|
||||
|
||||
if(repeat == 1)
|
||||
{
|
||||
//Trigger the start_1 Signal for one Clock Cycle.
|
||||
*start_1 = 0;
|
||||
*start_1 = 1;
|
||||
}
|
||||
|
||||
if(repeat == 2)
|
||||
{
|
||||
//Trigger the start_2 Signal for one Clock Cycle.
|
||||
*start_2 = 0;
|
||||
*start_2 = 1;
|
||||
}
|
||||
|
||||
if(repeat == 3)
|
||||
{
|
||||
//Trigger the start_3 Signal for one Clock Cycle.
|
||||
*start_3 = 0;
|
||||
*start_3 = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
126
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.h
Normal file
126
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/fetch_scheduler.h
Normal file
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
17
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Fetch_Scheduler/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Fetch_Scheduler
|
||||
|
||||
set_top fetch_scheduler
|
||||
|
||||
add_files fetch_scheduler.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Fetch Scheduler" -version "1.0"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Info_Memory_Block/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Info_Memory_Block/.keep
Normal file
102
Hardware/Vivado_HLS_IPs/Info_Memory_Block/info_memory_block.cpp
Normal file
102
Hardware/Vivado_HLS_IPs/Info_Memory_Block/info_memory_block.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "info_memory_block.h"
|
||||
|
||||
|
||||
/*
|
||||
* info_memory_block()
|
||||
*
|
||||
* The Hardware Funtionality of the Info Memory Block Core.
|
||||
*
|
||||
* The Info Memory Block Core is Used to Aid the Acceleration Procedure of the Acceleration Groups Indirect (AGIs).
|
||||
* It is Accessed by the Acceleration Scheduler Indirect Cores of the AGIs as well as the Fetch and Send Schedulers.
|
||||
*
|
||||
* It Could be Considered as a Block of 16 Registers.
|
||||
* The Registers are Categorized in 4 Groups/Sets with 4 Registers in each Group/Set.
|
||||
*
|
||||
* Every Set of Registers Refers to one of the 4 AGIs.
|
||||
*
|
||||
* Set 0 Refers to AGI0.
|
||||
* Set 1 Refers to AGI1.
|
||||
* Set 2 Refers to AGI2.
|
||||
* Set 3 Refers to AGI3.
|
||||
*
|
||||
* The 4 Registers of Each Set Carry the Following Information:
|
||||
*
|
||||
* Register 0: Source Address.
|
||||
* Register 1: Destination Address.
|
||||
* Register 2: Data Size (Transfer Size).
|
||||
* Register 3: Address Offset.
|
||||
*
|
||||
* If an Acceleration Scheduler Indirect Requests a CDMA Transfer it Writes the Information Above to its own Set of Registers inside the Info Memory Block.
|
||||
* The Fetch or Send Scheduler Reads the Above Information from the Info Memory Block and Starts a CDMA Transfer Accordingly.
|
||||
*
|
||||
* The Function Parameters are the Input Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 to 16 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int info_memory_block(/*01*/unsigned int src_address_0,
|
||||
/*02*/unsigned int dst_address_0,
|
||||
/*03*/unsigned int data_size_0,
|
||||
/*04*/unsigned int offset_0,
|
||||
/*05*/unsigned int src_address_1,
|
||||
/*06*/unsigned int dst_address_1,
|
||||
/*07*/unsigned int data_size_1,
|
||||
/*08*/unsigned int offset_1,
|
||||
/*09*/unsigned int src_address_2,
|
||||
/*10*/unsigned int dst_address_2,
|
||||
/*11*/unsigned int data_size_2,
|
||||
/*12*/unsigned int offset_2,
|
||||
/*13*/unsigned int src_address_3,
|
||||
/*14*/unsigned int dst_address_3,
|
||||
/*15*/unsigned int data_size_3,
|
||||
/*16*/unsigned int offset_3
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the First Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_0 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_0 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_0 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_0 bundle=int_cfg
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the Second Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_1 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_1 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_1 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_1 bundle=int_cfg
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the Third Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_2 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_2 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_2 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_2 bundle=int_cfg
|
||||
|
||||
/*
|
||||
* Source Address, Destination Address, Data Size and Address Offset Registers of the Fourth Group/Set
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_3 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_3 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_3 bundle=int_cfg
|
||||
#pragma HLS INTERFACE s_axilite port=offset_3 bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
17
Hardware/Vivado_HLS_IPs/Info_Memory_Block/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Info_Memory_Block/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Info_Memory_Block
|
||||
|
||||
set_top info_memory_block
|
||||
|
||||
add_files info_memory_block.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Info Memory Block" -version "1.0"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Interrupt_Manager/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Interrupt_Manager/.keep
Normal file
206
Hardware/Vivado_HLS_IPs/Interrupt_Manager/interrupt_manager.cpp
Normal file
206
Hardware/Vivado_HLS_IPs/Interrupt_Manager/interrupt_manager.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "interrupt_manager.h"
|
||||
|
||||
|
||||
/*
|
||||
* interrupt_manager()
|
||||
*
|
||||
* The Hardware Funtionality of the Interrupt Manager Core.
|
||||
*
|
||||
* The Interrupt Manager Core is Developed to Handle and Forward the Completion Interrupts from the 7 Acceleration Groups.
|
||||
*
|
||||
* The Goal of each Acceleration Group is to Inform the Linux Kernel Driver for the Completion of the Acceleration Procedure.
|
||||
* The Communication of the FPGA with the Host System is Achieved through a PCIe Bus, thus, the Way to Signal the Driver is to Send MSI Interrupts.
|
||||
*
|
||||
* Sending a MSI is a Responsibility of the FPGA's PCIe Bridge.
|
||||
* The PCIe Bridge Carries a 5-Bit Input to Set the Vector Number of the MSI and a 1-Bit Input which is Used to Trigger the MSI According to the Vector Number.
|
||||
* In the Current Block Design the 2 Inputs of the PCIe Bridge are Connected with the Two Channels of a GPIO Peripheral.
|
||||
* This GPIO from now on will be Recognized as GPIO-MSI.
|
||||
* Writing Values in the Data Registers of the 2 Channels of the GPIO-MSI Leads to Triggering a MSI Interrupt.
|
||||
*
|
||||
* In Older Approaches the Acceleration Scheduler of each Acceleration Group would Simply Access the GPIO-MSI to Send MSI Interrupts on Completion of an Image Process.
|
||||
* This Approach was Proved to be Unreliable Since the Concurrent Access to the GPIO-MSI by Multiple Acceleration Groups
|
||||
* Could Lead to Possible Loss of Interrupts that were NEVER Transmitted.
|
||||
*
|
||||
* The new Approach to Ensure Zero Loss of Interrupts was to Develop the Current Interrupt Manager.
|
||||
* The Interrupt Manager Includes an Array of 7 Registers where each Register Refers to each of the 7 Acceleration Groups.
|
||||
*
|
||||
* Register_Array[0] Refers to AGD0
|
||||
* Register_Array[1] Refers to AGD1
|
||||
* Register_Array[2] Refers to AGI0
|
||||
* Register_Array[3] Refers to AGI1
|
||||
* Register_Array[4] Refers to AGI2
|
||||
* Register_Array[5] Refers to AGI3
|
||||
* Register_Array[6] Refers to AGSG
|
||||
*
|
||||
* When an Acceleration Scheduler of any of the Acceleration Groups Requires to Send an MSI Interrupt for the Completion of its Acceleration Procedure
|
||||
* it Simply Writes a Vector Number Value to the Corresponding Field of the Register Array of the Interrupt Manager as a MSI Request.
|
||||
* The Kernel Driver Identifies the Acceleration Group that "Sent" the MSI by the Vector Number.
|
||||
*
|
||||
* Vector Number:0 --> AGD0
|
||||
* Vector Number:1 --> AGD1
|
||||
* Vector Number:2 --> AGI0
|
||||
* Vector Number:3 --> AGI1
|
||||
* Vector Number:4 --> AGI2
|
||||
* Vector Number:5 --> AGI3
|
||||
* Vector Number:6 --> AGSG
|
||||
*
|
||||
* The Interrupt Manager Checks in a Round Robin Manner the Fields of the Register Array for a Non-Zero Value which Indicates a new MSI Request.
|
||||
* This Makes it Obvious that the Acceleration Schedulers Write to the Register Array of the Interrupt Manager the Vector Number Incremented by 1.
|
||||
* This is Done to Avoid Zero Values that are not Identified by the Interrupt Manager as MSI Requests.
|
||||
*
|
||||
* If the Interrupt Manager Finds a Field of the Register Array with Non-Zero Value then it Decreases this Value by 1 in order to Produce
|
||||
* the Correct Vector Number and Writes this Value to the GPIO-MSI Peripheral to Trigger the MSI Interrupt.
|
||||
*
|
||||
* The Interrupt Manager, then, Waits until it Receives an Acknowledgment Signal from the Kernel Driver before Checking for another MSI Request.
|
||||
* The Kernel Driver, actually, Writes a Logic 1 Value to another GPIO Peripheral whose 1-Bit Output Signals the Interrupt Manager.
|
||||
* This GPIO Peripheral from now on will be Recognized as GPIO-ACK.
|
||||
*
|
||||
*
|
||||
* The Sequential Steps of the Interrupt Management are as Follows:
|
||||
*
|
||||
* Start a for Loop with 7 Iterations where each Iteration is to Check for a MSI Request by the Corresponding Acceleration Group.
|
||||
* NOTE Enabling the Auto Restart Mode of the Current Core will Lead to Starting Over the for Loop.
|
||||
*
|
||||
* a --> Check if the Current Field of the Register Array Has a Non-Zero Value.
|
||||
* If this is the Case Proceed to Send a MSI Interrupt.
|
||||
* b --> Decrease the Value of the Current Field of the Register Array to Get the Correct Vector Number.
|
||||
* c --> Write the Vector Number to the GPIO-MSI Peripheral that is Connected with the PCIe Bridge to Trigger an MSI Interrupt.
|
||||
* d --> Wait for an Acknowledgment Signal from the Driver through the GPIO-ACK Peripheral.
|
||||
* e --> Self-Clear to Zero the Current Field of the Register Array of the Interrupt Manager.
|
||||
* The Next Time we Find a Non-Zero Value in this Field we Know that an Acceleration Group Has Made a Valid MSI Request.
|
||||
* f --> Clear the Data Register of the GPIO-ACk Peripheral.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Acknowledgements from the Linux Kernel Driver.
|
||||
* 03 to 06 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*/
|
||||
int interrupt_manager(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *intr_ack,
|
||||
/*03*/unsigned int gpio_msi_device_address,
|
||||
/*04*/unsigned int gpio_ack_device_address,
|
||||
/*05*/unsigned int self_msi_request_offset,
|
||||
/*06*/unsigned int msi_request[7]
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The gpio_msi_device_address is a Register to Store the Base Address of the GPIO-MSI that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=gpio_msi_device_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The gpio_ack_device_address is a Register to Store the Base Address of the GPIO-ACK that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=gpio_ack_device_address bundle=cfg
|
||||
|
||||
/*
|
||||
* The self_msi_request_offset is a Register to Store the Address Offset where the Register Array (msi_request) is Located.
|
||||
* This Address Offset Actually Leads the Interrupt Manager to Access its Own Configuration Registers through its AXI Slave Lite (cfg) Interface.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=self_msi_request_offset bundle=cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=msi_request bundle=cfg
|
||||
|
||||
/*
|
||||
* The intr_ack is a Single Bit Input which is Used to Receive External Acknowledgements from the Linux Kernel Driver.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=intr_ack
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=cfg
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<1> intr_ack_value; // Used to Read the Last Value of the intr_ack Input Port.
|
||||
|
||||
|
||||
for(int repeat = 0; repeat < 7; repeat++)
|
||||
{
|
||||
|
||||
//If the Current Field of the Register Array (msi_request) Has a Non-Zero Value then we Have a Valid MSI Request by the Corresponding Acceleration Group.
|
||||
if(msi_request[repeat] != 0)
|
||||
{
|
||||
/*
|
||||
* ---------------------------------------------------------
|
||||
* Send a MSI Interrupt by Writing to the GPIO-MSI Registers
|
||||
* ---------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Decrease the Value of the Current Field of the Register Array to Get the Correct Vector Number.
|
||||
data_register = msi_request[repeat] - 1;
|
||||
|
||||
//Write the Vector Number to the Data Register of the Second Channel of the GPIO-MSI.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_2_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Write a Logic 1 Value to the Data Register of the First Channel of the GPIO-MSI to Trigger the MSI Interrupt.
|
||||
data_register = 0x1;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Data Register of the First Channel of the GPIO-MSI back to Zero.
|
||||
data_register = 0x0;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Wait for a Interrupt Acknowledgement
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the intr_ack Input.
|
||||
intr_ack_value = *intr_ack;
|
||||
|
||||
//Keep Looping for as long as the intr_ack Input Does not Reach a Logic 1 Value.
|
||||
while(intr_ack_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the intr_ack Input.
|
||||
intr_ack_value = *intr_ack;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
intr_ack_value = 0;
|
||||
|
||||
data_register = 0x0;
|
||||
|
||||
/* ---------------------------------------------------------------------------
|
||||
* Self-Clear the Current Field of the Register Array of the Interrupt Manager
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Write a Zero Value to the Current Field of the Register Array of the Interrupt Manager to Clear the Field.
|
||||
//NOTE the Interrupt Manager Herein Uses its AXI Master Interface to Write to its own AXI Slave Lite Interface.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (self_msi_request_offset + (repeat * 4)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -----------------------------
|
||||
* Clear the GPIO-ACK Peripheral
|
||||
* -----------------------------
|
||||
*/
|
||||
|
||||
//Clear the GPIO-ACK by Writing a Zero Value to its Data Register.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (gpio_ack_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* ---------------------------------------------------
|
||||
* Registers and Offsets of the Xilinx GPIO Peripheral
|
||||
* ---------------------------------------------------
|
||||
*/
|
||||
|
||||
#define XGPIO_CHANNEL_1_OFFSET 0x0 // GPIO Channel 1 Base Offset.
|
||||
#define XGPIO_CHANNEL_2_OFFSET 0x8 // GPIO Channel 2 Base Offset.
|
||||
|
||||
/*
|
||||
* GPIO Channel 1 Data Register.
|
||||
*
|
||||
* The Data Register of GPIO Channel 2 is XGPIO_DATA_OFFSET + XGPIO_CHANNEL_2_OFFSET.
|
||||
*/
|
||||
#define XGPIO_DATA_OFFSET 0x0
|
||||
|
||||
17
Hardware/Vivado_HLS_IPs/Interrupt_Manager/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Interrupt_Manager/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Interrupt_Manager
|
||||
|
||||
set_top interrupt_manager
|
||||
|
||||
add_files interrupt_manager.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Interrupt Manager" -version "3.5"
|
||||
|
||||
exit
|
||||
0
Hardware/Vivado_HLS_IPs/Send_Scheduler/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Send_Scheduler/.keep
Normal file
17
Hardware/Vivado_HLS_IPs/Send_Scheduler/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Send_Scheduler/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Send_Scheduler
|
||||
|
||||
set_top send_scheduler
|
||||
|
||||
add_files send_scheduler.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Send Scheduler" -version "3.0"
|
||||
|
||||
exit
|
||||
476
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.cpp
Normal file
476
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.cpp
Normal file
@@ -0,0 +1,476 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_cint.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_int.h"
|
||||
#include "send_scheduler.h"
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------
|
||||
* Registers and Masks of the DMA
|
||||
* ------------------------------
|
||||
*/
|
||||
#define XAXICDMA_CR_OFFSET 0x00000000 // Control Register.
|
||||
#define XAXICDMA_SR_OFFSET 0x00000004 // Status Register.
|
||||
|
||||
|
||||
#define XAXICDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
|
||||
#define XAXICDMA_DSTADDR_OFFSET 0x00000020 // Destination Address Register.
|
||||
#define XAXICDMA_BTT_OFFSET 0x00000028 // Bytes to Transfer Register.
|
||||
|
||||
|
||||
|
||||
#define XAXICDMA_CR_RESET_MASK 0x00000004 // Reset CDMA Mask.
|
||||
|
||||
#define XAXICDMA_XR_IRQ_IOC_MASK 0x00001000 // Interrupt On Completion (IOC) Mask.
|
||||
#define XAXICDMA_XR_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
|
||||
#define XAXICDMA_XR_IRQ_ALL_MASK 0x00007000 // All Interrupt Mask.
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* Registers and Masks of the AXI Performance Monitor Unit (APM/Shared Timer)
|
||||
* --------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define XAPM_GCC_HIGH_OFFSET 0x0000 //Global Clock Counter 32 to 63 Bits (Upper).
|
||||
#define XAPM_GCC_LOW_OFFSET 0x0004 //Global Clock Counter 0 to 31 Bits (Lower).
|
||||
|
||||
/*
|
||||
* send_scheduler()
|
||||
*
|
||||
* The Hardware Funtionality of the Send Scheduler Core.
|
||||
*
|
||||
* The Send Scheduler Core Does not Belong to Any Particular Acceleration Group but it is Used by ALL(4) the Acceleration Groups Indirect (AGIs).
|
||||
* The Responsibility of this Core is to Manage the Procedure of Sending the Processed Data by the AGIs back to the Host's Memory.
|
||||
* It Checks its Scheduler Buffer in Round Robin for new Transfer Requests by any of the AGIs.
|
||||
* If it Finds Information for new Transfer it Starts the CDMA Send Core to Transfer Processed Image Data from the FPGA's DDR3 to the Host's Memory.
|
||||
* The Interrupt Manager will then be Informed about the Completion of the CDMA Send Transfer which is, also, the Completion of the Acceleration Procedure of the Corresponding AGI.
|
||||
*
|
||||
* When an AGI wants to Request a Transfer of Processed Image Data from the Send Scheduler it Has to Write the Source and Destination Addresses as well as the Transfer Size
|
||||
* and, if Required, an Address Offset to the Scheduler Buffer that Belongs to the Send Scheduler.
|
||||
*
|
||||
* The Scheduler Buffer Has 4 Sets of Registers with 4 Registers for each Set.
|
||||
* The 4 Registers are Used to Store the Source Address, the Destination Address, the Transfer Size and an Address Offset (If Required) Respectively.
|
||||
* Each Set Corresponds to One of the 4 AGIs.
|
||||
*
|
||||
* When an AGI Writes the Above Information to the Scheduler Buffer, the Send Scheduler Starts a CDMA Transfer Accordingly
|
||||
* to Send the Processed Image Data back to the Host's Memory.
|
||||
*
|
||||
* The Sequential Steps of the Acceleration Procedure are as Follows:
|
||||
*
|
||||
* Start a for Loop of 4 Iterations where in each Iteration we Check for new CDMA Transfer Requests by each of the 4 AGIs Respectively.
|
||||
*
|
||||
* a --> Read the Data Size Register from the Current Set of Registers of the Scheduler Buffer.
|
||||
* If there is a Non-Zero Value then we Know that the Corresponding AGI Has Written the Required
|
||||
* Info (Source/Destination Address, Transfer Size, Addrress Offset) in Order to Request a Transfer by the CDMA Send.
|
||||
* If there is a Zero Value then we Check the Data Size Register of the Next Set for a Transfer Request by the Next AGI.
|
||||
* b --> Enable the Interrupts on the CDMA Send Core.
|
||||
* c --> Setup the CDMA with the Source and Destination Addresses.
|
||||
* If the Destination Data Should be Sent through the PCIe Bridge then Get the Destination Address from the Scheduler Buffer and Set the
|
||||
* Address Translation Register of the Corresponding AXI BAR of the PCIe Bridge with this Address.
|
||||
* Then Set the Destination Address Register of the CDMA Send Core to be the Corresponding AXI BAR.
|
||||
* If the Destination Data Should not be Sent through the PCIe Bridge then Just Set the Destination Address Register of the CDMA Send Core
|
||||
* with the Destination Address of the Scheduler Buffer.
|
||||
* d --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Send Transfer Started.
|
||||
* e --> Setup the Bytes to Transfer Register with the Transfer Size which Triggers the CDMA Send Transfer.
|
||||
* f --> Wait for an Interrupt by the CDMA Send on Completion of the Transfer.
|
||||
* g --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Send Transfer Ended.
|
||||
* h --> Acknowledge the CDMA Send Interrupt.
|
||||
* i --> Reset the CDMA Send Core.
|
||||
* j --> Re-Enable the Interrupts on the CDMA Send Core.
|
||||
* k --> Clear the Set of Registers of the Scheduler Buffer that Refer to the Current AGI.
|
||||
* l --> Inform the Interrupt Manager About the Completion of the CDMA Send Tranfer which is, also, the Completion of the Acceleration Procedure.
|
||||
*
|
||||
* Repeat the Above Steps (a to l) for the Next Set of Registers of the Scheduler Buffer.
|
||||
*
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
|
||||
* 02 --------> Single Bit Input Used to Receive External Interrupts from the CDMA Send Core.
|
||||
* 03 to 16 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*
|
||||
* NOTE datr in pcie_ctl_datr_address Stands for Dynamic Address Translator Register.
|
||||
*/
|
||||
int send_scheduler(/*01*/volatile ap_uint<32> *ext_cfg,
|
||||
/*02*/volatile ap_uint<1> *cdma_intr_in,
|
||||
/*03*/unsigned int cdma_base_address,
|
||||
/*04*/unsigned int scheduler_buffer_base_address,
|
||||
/*05*/unsigned int src_address_first_reg_offset,
|
||||
/*06*/unsigned int dst_address_first_reg_offset,
|
||||
/*07*/unsigned int data_size_first_reg_offset,
|
||||
/*08*/unsigned int offset_first_reg_offset,
|
||||
/*09*/unsigned int step_offset,
|
||||
/*10*/unsigned int shared_apm_base_address,
|
||||
/*11*/unsigned int shared_metrics_base_address,
|
||||
/*12*/unsigned int axi_bar_base_address,
|
||||
/*13*/unsigned int pcie_ctl_datr_address,
|
||||
/*14*/unsigned int pcie_mode,
|
||||
/*15*/unsigned int interrupt_manager_register_offset,
|
||||
/*16*/unsigned int accel_group_jump
|
||||
)
|
||||
{
|
||||
|
||||
/*
|
||||
* The ext_cfg is the AXI Master Interface of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE m_axi port=ext_cfg
|
||||
|
||||
/*
|
||||
* The cdma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the CDMA Send Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE ap_none port=cdma_intr_in
|
||||
|
||||
/*
|
||||
* The cdma_base_address is a Register to Store the Base Address of the CDMA Send that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=cdma_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The scheduler_buffer_base_address is a Register to Store the Base Address of the Scheduler Buffer that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The src_address_first_reg_offset is a Register to Store the Address Offset where the Source Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=src_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The dst_address_first_reg_offset is a Register to Store the Address Offset where the Destination Address Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=dst_address_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The data_size_first_reg_offset is a Register to Store the Address Offset where the Transfer Size Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=data_size_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The offset_first_reg_offset is a Register to Store the Address Offset where the Offset Register
|
||||
* of the First Set of Registers inside the Scheduler Buffer is Located.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=offset_first_reg_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The step_offset is a Register to Store the Number of Bytes to Jump inside the Scheduler Buffer
|
||||
* in order to Locate the Next Set of Registers.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=step_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The axi_bar_base_address is a Register to Store the Base Address of the Destination AXI BAR of the PCIe Bridge that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Processed Image Data over the PCIe Bus.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=axi_bar_base_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_ctl_datr_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Destination AXI BAR.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_ctl_datr_address bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The pcie_mode is a Register to Store a Value (0 or 1) that Indicates whether we Access the Destination Image Data through the PCIe Bridge or not.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=pcie_mode bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
|
||||
* will Need to Access through the ext_cfg AXI Master Interface.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=int_cfg
|
||||
|
||||
/*
|
||||
* The accel_group_jump is a Register to Store a Value that Helps to Access the Correct Metrics Structure in the Metrics Memory in order
|
||||
* to Store the Time Metrics that Refer to the Current AGI.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=accel_group_jump bundle=int_cfg
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
|
||||
|
||||
|
||||
int repeat;
|
||||
|
||||
ap_uint<1> cdma_intr_in_value; // Used to Read the Last Value of the cdma_intr_in_value Input Port.
|
||||
|
||||
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
|
||||
ap_uint<32> irq; // Used to Temporalily Store the IRQ Mask.
|
||||
ap_uint<32> source_address_register; // Used to Temporalily Store the Value of the Source Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> destination_address_register; // Used to Temporalily Store the Value of the Destination Address Register of the Scheduler Buffer.
|
||||
ap_uint<32> data_size_register; // Used to Temporalily Store the Value of the Data Size Register of the Scheduler Buffer.
|
||||
ap_uint<32> offset_register; // Used to Temporalily Store the Value of the Offset Register of the Scheduler Buffer.
|
||||
|
||||
ap_uint<32> address; // Used to Calculate an Address along with an Offset.
|
||||
|
||||
ap_uint<32> cdma_send_time_start_gcc_l; // Store the CDMA Send Transfer Start Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_send_time_start_gcc_u; // Store the CDMA Send Transfer Start Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
ap_uint<32> cdma_send_time_end_gcc_l; // Store the CDMA Send Transfer End Time Lower Register from the Shared Timer (Shared APM).
|
||||
ap_uint<32> cdma_send_time_end_gcc_u; // Store the CDMA Send Transfer End Time Upper Register from the Shared Timer (Shared APM).
|
||||
|
||||
|
||||
/*
|
||||
* Start an Infinite Loop.
|
||||
*/
|
||||
while(1)
|
||||
{
|
||||
|
||||
/*
|
||||
* Make 4 Iterations and each Time Check the Current Set of Registers of the Scheduler Buffer for a New CDMA Send Transfer Request
|
||||
* by the AGI that Refers to the Current Set of Registers.
|
||||
*/
|
||||
for(repeat = 0; repeat < 4; repeat++)
|
||||
{
|
||||
//Read the Data Size Register of the Current Set of Registers of the Scheduler Buffer.
|
||||
memcpy(&data_size_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
//If the Data Size Register is not Empty then we Have a New CDMA Send Transfer Request.
|
||||
//Else the Send Scheduler will Check the Data Size Register of the Next Set in the Next Iteration.
|
||||
if(data_size_register != 0)
|
||||
{
|
||||
|
||||
/*
|
||||
* --------------------------------------------
|
||||
* Enable the Interrupts on the CDMA Send Core
|
||||
* --------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Send Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Send Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------
|
||||
* Setup the Source and Destination Address Registers of the CDMA Send Core
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Source Address Register of the CDMA Send Core with the Source Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &source_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
//If the PCIe Mode is Enabled then the Destination Data Should be Written through the PCIe Bridge.
|
||||
//This Mode Requires to Set the Address Tranlation Register of the Destination AXI BAR of the PCI Bridge.
|
||||
if(pcie_mode == 1)
|
||||
{
|
||||
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Address Tranlation Register of the Destination AXI BAR of the PCI Bridge with the Destination Physical Address.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (pcie_ctl_datr_address) / 4), &destination_address_register, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Destination Address Register of the CDMA Send Core to be the Specified Destination AXI BAR along with a Possible Offset.
|
||||
address = axi_bar_base_address + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
//If the PCIe Mode is Disabled there is no Need to Set the Address Translation Registers of the PCIe Bridge.
|
||||
else
|
||||
{
|
||||
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
|
||||
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
|
||||
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
|
||||
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Destination Address Register of the CDMA Send Core with the Destination Address along with the Offset Read from the Scheduler Buffer.
|
||||
address = destination_address_register + offset_register;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Send Transfer Start Time
|
||||
* ---------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Send Transfer Start Time.
|
||||
memcpy(&cdma_send_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Send Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_START_L_OFFSET) / 4), &cdma_send_time_start_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Send Transfer Start Time.
|
||||
memcpy(&cdma_send_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Send Transfer Start Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_START_U_OFFSET) / 4), &cdma_send_time_start_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------------------------
|
||||
* Setup the Bytes To Transfer (BTT) Register of the CDMA Send Core which Triggers the Transfer
|
||||
* --------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Set the Bytes To Tranfer Register of the CDMA Send Core with the Transfer Size in Bytes.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_BTT_OFFSET) / 4), &data_size_register, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Wait for a CDMA Interrupt
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Make an Initial Read of the Current State of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
|
||||
//Keep Looping for as long as the cdma_intr_in Input Does not Reach a Logic 1 Value.
|
||||
while(cdma_intr_in_value != 1)
|
||||
{
|
||||
//Keep Reading the Last Value of the cdma_intr_in Input.
|
||||
cdma_intr_in_value = *cdma_intr_in;
|
||||
}
|
||||
|
||||
//Reset the Reader Variable.
|
||||
cdma_intr_in_value = 0;
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Send Tranfer End Time
|
||||
* -------------------------------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Send Tranfer End Time.
|
||||
memcpy(&cdma_send_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 LSBs of the CDMA Send Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_END_L_OFFSET) / 4), &cdma_send_time_end_gcc_l, sizeof(ap_uint<32>));
|
||||
|
||||
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Send Tranfer End Time.
|
||||
memcpy(&cdma_send_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Store the 32 MSBs of the CDMA Send Tranfer End Time to a Specific Offset of the Metrics Memory.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_END_U_OFFSET) / 4), &cdma_send_time_end_gcc_u, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* ------------------------------------
|
||||
* Acknowledge the CDMA Send Interrupt
|
||||
* ------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Status Register of the CDMA Send Core which among others Includes the Status of the DMA's IRQs.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Filter the Recently Read Value with the XAXICDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
|
||||
irq = data_register & XAXICDMA_XR_IRQ_ALL_MASK;
|
||||
|
||||
//Write the new Value Back to the Status Register of the CDMA Send Core which Acknowledges the Triggered Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), &irq, sizeof(ap_uint<32>));
|
||||
|
||||
|
||||
/*
|
||||
* -------------------------
|
||||
* Reset the CDMA Send Core
|
||||
* -------------------------
|
||||
*/
|
||||
|
||||
//Write the Reset Mask to the Control Register of the CDMA Send Core in order to Reset the Core.
|
||||
data_register = XAXICDMA_CR_RESET_MASK;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* -----------------------------------------------
|
||||
* Re-Enable the Interrupts on the CDMA Send Core
|
||||
* -----------------------------------------------
|
||||
*/
|
||||
|
||||
//Read the Control Register of the CDMA Send Core.
|
||||
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
|
||||
|
||||
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
|
||||
//NOTE that IOC Stands for Interrupt On Complete.
|
||||
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
|
||||
|
||||
//Write the new Value Back to the Control Register of the CDMA Send Core to Enable the Interrupts.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* Reset to Zero the 4 Registers of the Current Set of Registers of the Scheduler Buffer
|
||||
*/
|
||||
data_register = 0;
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
|
||||
/*
|
||||
* If the PCIe Mode is Enabled then we Have to Inform the Interrupt Manager to Send a MSI Interrupt Since the CDMA Send Transfer is Complete
|
||||
* which, also, Means that the Acceleration Procedure is Complete.
|
||||
*/
|
||||
if(pcie_mode == 1)
|
||||
{
|
||||
//The Current Iteration Value along with the accel_group_jump Value Indicate the Acceleration Group Number of the Acceleration Group Indirect which Can be 2, 3, 4 or 5.
|
||||
//2 is AGI0.
|
||||
//3 is AGI1.
|
||||
//4 is AGI2.
|
||||
//5 is AGI3.
|
||||
data_register = repeat + accel_group_jump;
|
||||
|
||||
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
|
||||
memcpy((ap_uint<32> *)(ext_cfg + (interrupt_manager_register_offset + (repeat * 4)) / 4), &data_register, sizeof(ap_uint<32>));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
126
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.h
Normal file
126
Hardware/Vivado_HLS_IPs/Send_Scheduler/send_scheduler.h
Normal file
@@ -0,0 +1,126 @@
|
||||
#define APM_READ_TRANSACTIONS_OFFSET 0
|
||||
#define APM_READ_BYTES_OFFSET 4
|
||||
|
||||
#define APM_WRITE_TRANSACTIONS_OFFSET 8
|
||||
#define APM_WRITE_BYTES_OFFSET 12
|
||||
|
||||
#define APM_PACKETS_OFFSET 16
|
||||
#define APM_BYTES_OFFSET 20
|
||||
|
||||
#define APM_GCC_L_OFFSET 24
|
||||
#define APM_GCC_U_OFFSET 28
|
||||
|
||||
#define CDMA_FETCH_TIME_START_L_OFFSET 32
|
||||
#define CDMA_FETCH_TIME_START_U_OFFSET 36
|
||||
|
||||
#define CDMA_FETCH_TIME_END_L_OFFSET 40
|
||||
#define CDMA_FETCH_TIME_END_U_OFFSET 44
|
||||
|
||||
#define CDMA_SEND_TIME_START_L_OFFSET 48
|
||||
#define CDMA_SEND_TIME_START_U_OFFSET 52
|
||||
|
||||
#define CDMA_SEND_TIME_END_L_OFFSET 56
|
||||
#define CDMA_SEND_TIME_END_U_OFFSET 60
|
||||
|
||||
#define DMA_ACCEL_TIME_START_L_OFFSET 64
|
||||
#define DMA_ACCEL_TIME_START_U_OFFSET 68
|
||||
|
||||
#define DMA_ACCEL_TIME_END_L_OFFSET 72
|
||||
#define DMA_ACCEL_TIME_END_U_OFFSET 76
|
||||
|
||||
|
||||
struct image_info
|
||||
{
|
||||
ap_uint<32> rows;
|
||||
ap_uint<32> columns;
|
||||
ap_uint<64> size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
|
||||
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
|
||||
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
ap_uint<32> apm_packets; //Offset 16 Bytes
|
||||
ap_uint<32> apm_bytes; //Offset 20 Bytes
|
||||
|
||||
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
|
||||
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
|
||||
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
|
||||
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
|
||||
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
|
||||
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
|
||||
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
|
||||
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
ap_uint<64> total_time_start;
|
||||
ap_uint<64> total_time_end;
|
||||
|
||||
ap_uint<64> sleep_time_start;
|
||||
ap_uint<64> sleep_time_end;
|
||||
|
||||
ap_uint<64> preparation_time_start;
|
||||
ap_uint<64> preparation_time_end;
|
||||
|
||||
ap_uint<64> load_time_start;
|
||||
ap_uint<64> load_time_end;
|
||||
|
||||
ap_uint<64> save_time_start;
|
||||
ap_uint<64> save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
ap_uint<32> accel_direct_0_occupied_pid;
|
||||
ap_uint<32> accel_direct_1_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_indirect_0_occupied_pid;
|
||||
ap_uint<32> accel_indirect_1_occupied_pid;
|
||||
ap_uint<32> accel_indirect_2_occupied_pid;
|
||||
ap_uint<32> accel_indirect_3_occupied_pid;
|
||||
|
||||
ap_uint<32> accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
ap_uint<32> accelerator_busy;
|
||||
ap_uint<32> open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
0
Hardware/Vivado_HLS_IPs/Sobel_Filter/.keep
Normal file
0
Hardware/Vivado_HLS_IPs/Sobel_Filter/.keep
Normal file
74
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_axi_sdata2.h
Normal file
74
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_axi_sdata2.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*******************************************************************************
|
||||
Vendor: Xilinx
|
||||
Associated Filename: ap_axi_sdata.h
|
||||
Purpose: AXI data type for AutoESL
|
||||
Revision History: February 13, 2012 - initial release
|
||||
|
||||
*******************************************************************************
|
||||
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
|
||||
|
||||
This file contains confidential and proprietary information of Xilinx, Inc. and
|
||||
is protected under U.S. and international copyright and other intellectual
|
||||
property laws.
|
||||
|
||||
DISCLAIMER
|
||||
This disclaimer is not a license and does not grant any rights to the materials
|
||||
distributed herewith. Except as otherwise provided in a valid license issued to
|
||||
you by Xilinx, and to the maximum extent permitted by applicable law:
|
||||
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
|
||||
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
|
||||
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
|
||||
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
|
||||
in contract or tort, including negligence, or under any other theory of
|
||||
liability) for any loss or damage of any kind or nature related to, arising under
|
||||
or in connection with these materials, including for any direct, or any indirect,
|
||||
special, incidental, or consequential loss or damage (including loss of data,
|
||||
profits, goodwill, or any type of loss or damage suffered as a result of any
|
||||
action brought by a third party) even if such damage or loss was reasonably
|
||||
foreseeable or Xilinx had been advised of the possibility of the same.
|
||||
|
||||
CRITICAL APPLICATIONS
|
||||
Xilinx products are not designed or intended to be fail-safe, or for use in any
|
||||
application requiring fail-safe performance, such as life-support or safety
|
||||
devices or systems, Class III medical devices, nuclear facilities, applications
|
||||
related to the deployment of airbags, or any other applications that could lead
|
||||
to death, personal injury, or severe property or environmental damage
|
||||
(individually and collectively, "Critical Applications"). Customer assumes the
|
||||
sole risk and liability of any use of Xilinx products in Critical Applications,
|
||||
subject only to applicable laws and regulations governing limitations on product
|
||||
liability.
|
||||
|
||||
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
|
||||
ALL TIMES.
|
||||
|
||||
*******************************************************************************/
|
||||
#ifndef __AP__AXI_SDATA__
|
||||
#define __AP__AXI_SDATA__
|
||||
|
||||
#define AP_INT_MAX_W 4096
|
||||
#include "ap_int.h"
|
||||
|
||||
template<int D,int U,int TI,int TD>
|
||||
struct ap_axis2{
|
||||
ap_int<D> data;
|
||||
ap_int<(D+7)/8> strb;
|
||||
ap_int<U> user;
|
||||
ap_int<1> last;
|
||||
// ap_int<TI> tid;
|
||||
ap_int<TD> tdest;
|
||||
};
|
||||
|
||||
template<int D,int U,int TI,int TD>
|
||||
struct ap_axiu2{
|
||||
ap_uint<D> data;
|
||||
ap_uint<(D+7)/8> strb;
|
||||
ap_uint<U> user;
|
||||
ap_uint<1> last;
|
||||
//ap_uint<TI> tid;
|
||||
ap_uint<TD> tdest;
|
||||
};
|
||||
|
||||
//typedef ap_axis<int D, int U, int TI, int TD> ap_axis_unsigned<int D, int U, int TI, int TD>;
|
||||
|
||||
|
||||
#endif
|
||||
99
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_bmp.h
Normal file
99
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_bmp.h
Normal file
@@ -0,0 +1,99 @@
|
||||
/*******************************************************************************
|
||||
Vendor: Xilinx
|
||||
Associated Filename: ap_bmp.h
|
||||
Purpose: BMP image reader and writer header file for AutoESL
|
||||
Revision History: February 13, 2012 - initial release
|
||||
|
||||
*******************************************************************************
|
||||
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
|
||||
|
||||
This file contains confidential and proprietary information of Xilinx, Inc. and
|
||||
is protected under U.S. and international copyright and other intellectual
|
||||
property laws.
|
||||
|
||||
DISCLAIMER
|
||||
This disclaimer is not a license and does not grant any rights to the materials
|
||||
distributed herewith. Except as otherwise provided in a valid license issued to
|
||||
you by Xilinx, and to the maximum extent permitted by applicable law:
|
||||
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
|
||||
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
|
||||
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
|
||||
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
|
||||
in contract or tort, including negligence, or under any other theory of
|
||||
liability) for any loss or damage of any kind or nature related to, arising under
|
||||
or in connection with these materials, including for any direct, or any indirect,
|
||||
special, incidental, or consequential loss or damage (including loss of data,
|
||||
profits, goodwill, or any type of loss or damage suffered as a result of any
|
||||
action brought by a third party) even if such damage or loss was reasonably
|
||||
foreseeable or Xilinx had been advised of the possibility of the same.
|
||||
|
||||
CRITICAL APPLICATIONS
|
||||
Xilinx products are not designed or intended to be fail-safe, or for use in any
|
||||
application requiring fail-safe performance, such as life-support or safety
|
||||
devices or systems, Class III medical devices, nuclear facilities, applications
|
||||
related to the deployment of airbags, or any other applications that could lead
|
||||
to death, personal injury, or severe property or environmental damage
|
||||
(individually and collectively, "Critical Applications"). Customer assumes the
|
||||
sole risk and liability of any use of Xilinx products in Critical Applications,
|
||||
subject only to applicable laws and regulations governing limitations on product
|
||||
liability.
|
||||
|
||||
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
|
||||
ALL TIMES.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef __XLNX__BITMAP__
|
||||
#define __XLNX__BITMAP__
|
||||
|
||||
// Basic color definitions
|
||||
#define BLACK 0
|
||||
#define WHITE 255
|
||||
|
||||
// Maximum image size
|
||||
#define MAX_ROWS 1080
|
||||
#define MAX_COLS 1920
|
||||
|
||||
//File Information Header
|
||||
typedef struct{
|
||||
unsigned short FileType;
|
||||
unsigned int FileSize;
|
||||
unsigned short Reserved1;
|
||||
unsigned short Reserved2;
|
||||
unsigned short Offset;
|
||||
}BMPHeader;
|
||||
|
||||
typedef struct{
|
||||
unsigned int Size;
|
||||
unsigned int Width;
|
||||
unsigned int Height;
|
||||
unsigned short Planes;
|
||||
unsigned short BitsPerPixel;
|
||||
unsigned int Compression;
|
||||
unsigned int SizeOfBitmap;
|
||||
unsigned int HorzResolution;
|
||||
unsigned int VertResolution;
|
||||
unsigned int ColorsUsed;
|
||||
unsigned int ColorsImportant;
|
||||
}BMPImageHeader;
|
||||
|
||||
typedef struct{
|
||||
BMPHeader *file_header;
|
||||
BMPImageHeader *image_header;
|
||||
unsigned int *colors;
|
||||
unsigned char *data;
|
||||
unsigned char R[MAX_ROWS][MAX_COLS];
|
||||
unsigned char G[MAX_ROWS][MAX_COLS];
|
||||
unsigned char B[MAX_ROWS][MAX_COLS];
|
||||
unsigned char Y[MAX_ROWS][MAX_COLS];
|
||||
char U[MAX_ROWS][MAX_COLS];
|
||||
char V[MAX_ROWS][MAX_COLS];
|
||||
}BMPImage;
|
||||
|
||||
//Read Function
|
||||
int BMP_Read(char *file, int row, int col, unsigned char *R, unsigned char *G, unsigned char *B);
|
||||
|
||||
//Write Function
|
||||
int BMP_Write(char *file, int row, int col, unsigned char *R, unsigned char *G, unsigned char *B);
|
||||
|
||||
#endif
|
||||
341
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_video.h
Normal file
341
Hardware/Vivado_HLS_IPs/Sobel_Filter/ap_video.h
Normal file
@@ -0,0 +1,341 @@
|
||||
/*******************************************************************************
|
||||
Vendor: Xilinx
|
||||
Associated Filename: ap_video.h
|
||||
Purpose: Video datatype header file for AutoESL
|
||||
Revision History: February 13, 2012 - initial release
|
||||
January 28, 2015 - Caes-lab TEI Crete revised
|
||||
*******************************************************************************
|
||||
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
|
||||
|
||||
This file contains confidential and proprietary information of Xilinx, Inc. and
|
||||
is protected under U.S. and international copyright and other intellectual
|
||||
property laws.
|
||||
|
||||
DISCLAIMER
|
||||
This disclaimer is not a license and does not grant any rights to the materials
|
||||
distributed herewith. Except as otherwise provided in a valid license issued to
|
||||
you by Xilinx, and to the maximum extent permitted by applicable law:
|
||||
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
|
||||
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
|
||||
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
|
||||
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
|
||||
in contract or tort, including negligence, or under any other theory of
|
||||
liability) for any loss or damage of any kind or nature related to, arising under
|
||||
or in connection with these materials, including for any direct, or any indirect,
|
||||
special, incidental, or consequential loss or damage (including loss of data,
|
||||
profits, goodwill, or any type of loss or damage suffered as a result of any
|
||||
action brought by a third party) even if such damage or loss was reasonably
|
||||
foreseeable or Xilinx had been advised of the possibility of the same.
|
||||
|
||||
CRITICAL APPLICATIONS
|
||||
Xilinx products are not designed or intended to be fail-safe, or for use in any
|
||||
application requiring fail-safe performance, such as life-support or safety
|
||||
devices or systems, Class III medical devices, nuclear facilities, applications
|
||||
related to the deployment of airbags, or any other applications that could lead
|
||||
to death, personal injury, or severe property or environmental damage
|
||||
(individually and collectively, "Critical Applications"). Customer assumes the
|
||||
sole risk and liability of any use of Xilinx products in Critical Applications,
|
||||
subject only to applicable laws and regulations governing limitations on product
|
||||
liability.
|
||||
|
||||
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
|
||||
ALL TIMES.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef ___AP__VIDEO__
|
||||
#define ___AP__VIDEO__
|
||||
|
||||
#include "ap_int.h"
|
||||
|
||||
|
||||
/* Parametrized RGB structure */
|
||||
template <int A, int D, int C>
|
||||
struct ap_rgb{
|
||||
ap_uint<A> B;
|
||||
ap_uint<D> G;
|
||||
ap_uint<C> R;
|
||||
};
|
||||
|
||||
/* Parametrized YUV structure */
|
||||
template <int A, int B, int C>
|
||||
struct ap_yuv{
|
||||
ap_uint<A> Y;
|
||||
ap_int<B> U;
|
||||
ap_int<C> V;
|
||||
};
|
||||
|
||||
/* Line buffer class definition */
|
||||
template <typename T, int LROW, int LCOL>
|
||||
class ap_linebuffer{
|
||||
public:
|
||||
T M[LROW][LCOL];
|
||||
|
||||
ap_linebuffer(){
|
||||
#pragma AP ARRAY_PARTITION variable=M dim=1 complete
|
||||
//#pragma AP data_pack variable=M
|
||||
//#pragma AP dependence variable=M intra false
|
||||
//#pragma AP dependence variable=M inter false
|
||||
};
|
||||
~ap_linebuffer(){};
|
||||
void shift_up(int col);
|
||||
void shift_down(int col);
|
||||
void shift_down_all();
|
||||
void insert(T value, int row, int col);
|
||||
void insert_top(T value, int col);
|
||||
void insert_bottom(T value, int col);
|
||||
void print(int StartCol, int EndCol);
|
||||
T getval(int RowIndex,int ColIndex);
|
||||
};
|
||||
|
||||
/* Line buffer print function.
|
||||
* Prints the values of all rows in the line buffer
|
||||
* between StartCol and EndCol
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::print(int StartCol, int EndCol)
|
||||
{
|
||||
int i, j;
|
||||
for(i = LROW-1; i > -1; i--){
|
||||
printf("Line %d:\t",i);
|
||||
for(j=StartCol; j < EndCol; j++){
|
||||
printf("%d\t",M[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/* Line buffer shift up
|
||||
* Assumes new data pixel will be entered at the bottom of the line buffer
|
||||
* The bottom is row = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::shift_up(int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
int i;
|
||||
for(i = LROW-1; i > 0; i--){
|
||||
#pragma AP unroll
|
||||
M[i][col] = M[i-1][col];
|
||||
}
|
||||
}
|
||||
|
||||
/* Line buffer shift down
|
||||
* Assumes new data pixel will be entered at the top of the line buffer
|
||||
* The bottom is row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::shift_down(int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
int i;
|
||||
for(i = 0; i < LROW-1; i++){
|
||||
#pragma AP unroll
|
||||
M[i][col] = M[i+1][col];
|
||||
}
|
||||
}
|
||||
|
||||
/* Line buffer shift down
|
||||
* Assumes new data pixel will be entered at the top of the line buffer
|
||||
* The bottom is row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::shift_down_all()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW-1; i++){
|
||||
#pragma AP unroll
|
||||
for(j = 0; j < LCOL; j++){
|
||||
#pragma AP unroll factor=120
|
||||
M[i][j] = M[i+1][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Line buffer insert bottom
|
||||
* Inserts a new value in the bottom row of the line buffer at column = col
|
||||
* The bottom is row = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::insert_bottom(T value, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
|
||||
M[0][col] = value;
|
||||
}
|
||||
|
||||
/* Line buffer insert top
|
||||
* Inserts a new value in the top row of the line buffer at column = col
|
||||
* The bottom is row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::insert_top(T value, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
|
||||
M[LROW-1][col] = value;
|
||||
}
|
||||
|
||||
/* Line buffer insert
|
||||
* Inserts a new value at any location of the line buffer
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_linebuffer<T,LROW,LCOL>::insert(T value, int row, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
M[row][col] = value;
|
||||
}
|
||||
|
||||
/* Line buffer getval
|
||||
* Returns the data value in the line buffer at position RowIndex, ColIndex
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
T ap_linebuffer<T,LROW,LCOL>::getval(int RowIndex,int ColIndex)
|
||||
{
|
||||
#pragma AP inline
|
||||
|
||||
T return_value;
|
||||
return_value = M[RowIndex][ColIndex];
|
||||
return return_value;
|
||||
}
|
||||
|
||||
/* Memory window class definition */
|
||||
template <typename T, int LROW, int LCOL>
|
||||
class ap_window{
|
||||
public:
|
||||
T M[LROW][LCOL];
|
||||
|
||||
ap_window(){
|
||||
#pragma AP ARRAY_PARTITION variable=M dim=0 complete
|
||||
//#pragma AP data_pack variable=M
|
||||
};
|
||||
~ap_window(){};
|
||||
void shift_right();
|
||||
void shift_left();
|
||||
void shift_up();
|
||||
void shift_down();
|
||||
void insert(T value, int row,int col);
|
||||
void print();
|
||||
T getval(int RowIndex,int ColIndex);
|
||||
};
|
||||
|
||||
/* Window print
|
||||
* Prints the entire contents of the memory window
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::print()
|
||||
{
|
||||
int i, j;
|
||||
printf("Window Size = %d x %d\n",LROW,LCOL);
|
||||
printf("Col \t");
|
||||
for(j = 0; j < LCOL; j++){
|
||||
printf("%d \t",j);
|
||||
}
|
||||
printf("\n");
|
||||
for(i = LROW-1; i > -1; i--){
|
||||
printf("Row %d: \t",i);
|
||||
for(j=0; j < LCOL; j++){
|
||||
printf("%d\t",M[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/* Window shift right
|
||||
* Moves all the contents of the window horizontally
|
||||
* Assumes new values will be placed in column = LCOL-1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_right()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW; i++){
|
||||
#pragma AP unroll
|
||||
for(j=0; j < LCOL-1; j++){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i][j+1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window shift left
|
||||
* Moves all the contents of the window horizontally
|
||||
* Assumes new values will be placed in column = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_left()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW; i++){
|
||||
#pragma AP unroll
|
||||
for(j=LCOL-1; j > 0; j--){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i][j-1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window shift up
|
||||
* Moves all the contents of the window vertically
|
||||
* Assumes new values will be placed in row = 0
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_up()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = LROW-1; i > 0; i--){
|
||||
#pragma AP unroll
|
||||
for(j=0; j < LCOL; j++){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i-1][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window shift down
|
||||
* Moves all the contents of the window vertically
|
||||
* Assumes new values will be placed in row = LROW - 1
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::shift_down()
|
||||
{
|
||||
#pragma AP inline
|
||||
int i, j;
|
||||
for(i = 0; i < LROW-1; i++){
|
||||
#pragma AP unroll
|
||||
for(j=0; j < LCOL; j++){
|
||||
#pragma AP unroll
|
||||
M[i][j] = M[i+1][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Window insert
|
||||
* Inserts a new value at any location of the window
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
void ap_window<T,LROW,LCOL>::insert(T value, int row, int col)
|
||||
{
|
||||
#pragma AP inline
|
||||
M[row][col] = value;
|
||||
}
|
||||
|
||||
/* Window getval
|
||||
* Returns the value of any window location
|
||||
*/
|
||||
template <typename T, int LROW, int LCOL>
|
||||
T ap_window<T,LROW,LCOL>::getval(int RowIndex, int ColIndex)
|
||||
{
|
||||
#pragma AP inline
|
||||
T return_value;
|
||||
return_value = M[RowIndex][ColIndex];
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,19 @@
|
||||
#include <ap_int.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
int is_packet_complete(int *count, int size) {
|
||||
|
||||
//Increase by 4 Bytes.
|
||||
*count += 4;
|
||||
|
||||
//If the Current Count Value is Equal to the Packet Size then Return 1.
|
||||
if (*count == size)
|
||||
{
|
||||
*count = 0;
|
||||
return 1;
|
||||
} else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
#ifndef _PACKET_MODE_OPERATIONS_H_
|
||||
#define _PACKET_MODE_OPERATIONS_H_
|
||||
|
||||
int is_packet_complete(int *count, int size);
|
||||
|
||||
#endif
|
||||
17
Hardware/Vivado_HLS_IPs/Sobel_Filter/run_hls.tcl
Normal file
17
Hardware/Vivado_HLS_IPs/Sobel_Filter/run_hls.tcl
Normal file
@@ -0,0 +1,17 @@
|
||||
open_project Sobel_Filter
|
||||
|
||||
set_top sobel_filter
|
||||
|
||||
add_files sobel.cpp
|
||||
add_files sobel_operations.cpp
|
||||
add_files packet_mode_operations.cpp
|
||||
|
||||
open_solution "solution1"
|
||||
|
||||
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
|
||||
set_part {xc7vx485tffg1761-2}
|
||||
create_clock -period 10 -name default
|
||||
|
||||
csynth_design
|
||||
|
||||
export_design -format ip_catalog -display_name "Sobel Filter" -version "5.8"
|
||||
611
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.cpp
Normal file
611
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.cpp
Normal file
@@ -0,0 +1,611 @@
|
||||
#include "sobel.h"
|
||||
#include "sobel_operations.h"
|
||||
#include "packet_mode_operations.h"
|
||||
|
||||
/*
|
||||
* sobel_filter()
|
||||
*
|
||||
* The Hardware Funtionality of the Sobel Filter (HW Accelerator) Core.
|
||||
*
|
||||
* The Sobel Filter is a HW Accelerator that Applies Sobel Edge Detection on Images.
|
||||
* It Receives and Processes the Image Data in Rows.
|
||||
* In order to Produce one Processed Row it Requires 3 Received Rows.
|
||||
* This Precondition is due to the Fact that Edge Detection is Applied to a Pixel according to its Neighbor Pixels.
|
||||
*
|
||||
* Once the Sobel Filter Receives the First 3 Rows it Produces one Processed Row.
|
||||
* Then it Rejects the First Row, Sets the Second Row as First and Sets the Last Row as Second.
|
||||
* The Next/Newly Received Row is Set as the Last of the Rows.
|
||||
* Now there are, again, 3 Rows in Order to Produce the Next Processed Row.
|
||||
* This Procedure Carries on until all the Rows of the Image are Received and Processed.
|
||||
*
|
||||
* NOTE that the First and Last Rows of the Processed Image are Filled with Dark Pixels.
|
||||
* NOTE also that the First and Last Columns of all the Rows of the Processed Image are Filled with Dark Pixels.
|
||||
*
|
||||
* The Sobel Edge Detection Cannot be Applied to the Perimetric Pixels of the Image Since they Miss the Required Amount of Neighbors
|
||||
* this is why they are Filled with Dark Pixels.
|
||||
*
|
||||
* The Sequential Steps of the Sobel Filter are as Follows:
|
||||
*
|
||||
* a --> Send the First Row which is Filled with Dark Pixels.
|
||||
* b --> Pre-Fetch the 3 First Rows of the Image.
|
||||
* c --> Process the 3 Rows.
|
||||
* d --> Fill the First and Last Columns of the Produced Row with Dark Pixels.
|
||||
* e --> Send the Produced Row.
|
||||
* f --> Receive the Next Row.
|
||||
* g --> Start Again from Step c Until Receiving and Processing all the Rows.
|
||||
* h --> Send the Last Row which is Filled with Dark Pixels.
|
||||
*
|
||||
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
|
||||
*
|
||||
* 01 --------> The AXI Stream Input/Slave Interface of the Core Used to Receive the Image Data.
|
||||
* 02 --------> The AXI Stream Output/Master Interface of the Core Used to Forward the Processed Image Data.
|
||||
* 03 to 06 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
|
||||
*
|
||||
*
|
||||
* IMPORTANT TECHNIQUES Used to Improve the Overall Performance:
|
||||
*
|
||||
* A)Each Image Row is not Received in a Single Buffer.
|
||||
* Instead, while it is being Received it is Equally Splitted and Distributed in 16 Sector Buffers.
|
||||
* Each Sector Buffer has no Dependence with the Rest Sector Buffers so the 16 Pieces of the Image Row Can be Processed in Parallel.
|
||||
* The HLS Tool Creates 16 Processing Units to Make Parallel Processing Possible.
|
||||
*
|
||||
*
|
||||
* B)Another Improvement Technique is the Usage of Four Line Sector Buffers which Allows
|
||||
* the Core to Process the Current 3 Rows while Concurrently Receiving the Next Row.
|
||||
* The Four Line Sector Buffer is Designed with 4 Lines where each is Used to Store the Data of a Single Row.
|
||||
* When the Sobel Filter Receives and Fills the 3 First Lines with 3 Rows it Starts the Processing.
|
||||
* The Fourth Line is Free to Start Receiving the Next Row while the Rest 3 Lines are Occupied with the Processing.
|
||||
*
|
||||
*
|
||||
* C)In Older Approaches after 3 Lines of a Three Line Sector Buffer were Processed the Lines would
|
||||
* Have to be Shifted Up so that the Last Line Could be Fed with the Next Received Row.
|
||||
* This Approach Required a Significant Amount of Copies where each Pixel of a Line of the Sector Buffer would Have to be Copied to the Upper Line.
|
||||
* The new Technique Requires Zero Copies as it Uses Indexing to Store the Received Rows in the Four Line Sector Buffer.
|
||||
*
|
||||
* Indexing Concerns which Should be Considered as the First, Second and Third Row to Process and where the Next Received Row Should be Stored.
|
||||
*
|
||||
* Initially: the First Received Row is Stored in the Line with Index 0 of the Four Line Sector Buffer.
|
||||
* : the Second Received Row is Stored in the Line with Index 3 of the Four Line Sector Buffer.
|
||||
* : the Third Received Row is Stored in the Line with Index 2 of the Four Line Sector Buffer.
|
||||
* : the Line with Index 1 is Used to Store the Next Received Line while the Other 3 are being Processed.
|
||||
*
|
||||
* When the Process of the 3 Lines Completes and a new Row is Received then the Indexing Changes so that we Can Start a new Processing and Receive another Row.
|
||||
* Now the First Row is no Longer Needed so the Line with Index 0 will be Used to Receive the Next Row.
|
||||
* The Second Row Becomes the First Row for the New Processing so the Line with Index 3 will be Used as the First Row.
|
||||
* The Third Row Becomes the Second Row for the New Processing so the Line with Index 2 will be Used as the Second Row.
|
||||
* The Last Received Row Becomes the Third Row for the New Processing so the Line with Index 1 will be Used as the Third Row.
|
||||
*
|
||||
* Following the Same Pattern as to which Lines to Process and where to Store the Next Row Leads to the Table Below:
|
||||
*
|
||||
* Index 0 | First Row | Next Row | Third Row | Second Row |
|
||||
* Index 1 | Next Row | Third Row | Second Row | First Row |
|
||||
* Index 2 | Third Row | Second Row | First Row | Next Row |
|
||||
* Index 3 | Second Row | First Row | Next Row | Third Row |
|
||||
*
|
||||
* To Make Indexing Applicable as Part of the Code we Used the first, second, last and temp Integer Variables which Hold the Current Index
|
||||
* in the Four Line Sector Buffer where each Row is Stored.
|
||||
*
|
||||
* In order to Calculate the Next Indexing for each Row we Used the Formula Below:
|
||||
* Index = (Index + 3) % 4
|
||||
*/
|
||||
int sobel_filter(/*01*/AXI_PIXEL STREAM_IN[MAX_WIDTH],
|
||||
/*02*/AXI_PIXEL STREAM_OUT[MAX_WIDTH],
|
||||
/*03*/int rows,
|
||||
/*04*/int cols,
|
||||
/*05*/int packet_mode_en,
|
||||
/*06*/int packet_size
|
||||
)
|
||||
{
|
||||
/*
|
||||
* Set the Fifo of the STREAM_OUT and STREAM_IN Interfaces to be Implemented with LUT RAM Memory.
|
||||
*/
|
||||
#pragma HLS RESOURCE variable=STREAM_OUT core=FIFO_LUTRAM
|
||||
#pragma HLS RESOURCE variable=STREAM_IN core=FIFO_LUTRAM
|
||||
|
||||
/*
|
||||
* The rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=rows bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* The cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=cols bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* The packet_mode_en is a Register to Store a Value that Enables/Disables the Packet Mode.
|
||||
* The Packet Mode Should be Enabled when the Data are Transferred with Scatter/Gather Transactions.
|
||||
* When the Packet Mode is Enabled the Core Sends a TLAST=1 Signal in the Output Interface for each Transmitted Packet.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=packet_mode_en bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* The packet_size is a Register to Store the Size that each Packet Should Have (e.g 4K) when Using Scatter/Gather Transfers.
|
||||
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
|
||||
*/
|
||||
#pragma HLS INTERFACE s_axilite port=packet_size bundle=S_AXI4_LITE
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=S_AXI4_LITE
|
||||
|
||||
/*
|
||||
* Set the STREAM_OUT and STREAM_IN Interfaces of the Core to be AXI Stream Interfaces.
|
||||
* The Fifo Depth is Set to 1920 which is the Maximum Image Width that the Core Can Support to Process.
|
||||
*/
|
||||
#pragma HLS INTERFACE axis depth=1920 port=STREAM_IN
|
||||
#pragma HLS INTERFACE axis depth=1920 port=STREAM_OUT
|
||||
|
||||
int bytes_count; //Count the Number of Tranferred Bytes.
|
||||
int first; //Used to Know where the First Received Row is Located in the LINE4_SECTOR_BUFFER.
|
||||
int second; //Used to Know where the Second Received Row is Located in the LINE4_SECTOR_BUFFER.
|
||||
int last; //Used to Know where the Last Received Row is Located in the LINE4_SECTOR_BUFFER.
|
||||
int temp; //Used to Know where the Newest Received Row Should be Temporalily Located in the LINE4_SECTOR_BUFFER.
|
||||
|
||||
/*
|
||||
* The Number of Iterations Required to Receive or Send each Sector of a Row.
|
||||
* The sector_iter is an Array with as many Fields as the Number of Sectors.
|
||||
* The sector_iter Array is Configured to be Completely Partitioned according to the #pragma HLS ARRAY_PARTITION.
|
||||
*/
|
||||
int sector_iter[SECTORS];
|
||||
#pragma HLS ARRAY_PARTITION variable=sector_iter dim=1 complete
|
||||
|
||||
int sector_size; //The Number of Columns that each Sector Should Store.
|
||||
int remaining_pixels; //If the Number of Columns is not an Integer Multiple of the Number of Sectors then we Have Remaining Pixels that Should be Distributed in all the Sectors.
|
||||
|
||||
const RGB zero_pixel = {0, 0, 0}; //This is a Dark Pixel Used to Set the First and Last Row and all the First and Last Columns of the Image.
|
||||
|
||||
/*
|
||||
* Declare 16 Memory Buffers of Type LINE4_SECTOR_BUFFER.
|
||||
* Each Buffer is Set to be Dual Port BRAM according to the #pragma HLS RESOURCE.
|
||||
*
|
||||
* These Buffers are Used to Receive the Image Rows before being Processed.
|
||||
*/
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR0;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR0 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR1;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR1 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR2;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR2 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR3;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR3 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR4;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR4 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR5;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR5 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR6;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR6 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR7;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR7 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR8;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR8 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR9;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR9 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR10;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR10 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR11;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR11 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR12;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR12 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR13;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR13 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR14;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR14 core=RAM_2P_BRAM
|
||||
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR15;
|
||||
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR15 core=RAM_2P_BRAM
|
||||
|
||||
/*
|
||||
* Declare 16 Memory Buffers of Type LINE1_SECTOR_BUFFER.
|
||||
* Each Buffer is Set to be Dual Port BRAM according to the #pragma HLS RESOURCE.
|
||||
*
|
||||
* These Buffers are Used to Store the Image Rows after being Processed.
|
||||
*/
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR0;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR0 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR1;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR1 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR2;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR2 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR3;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR3 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR4;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR4 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR5;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR5 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR6;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR6 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR7;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR7 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR8;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR8 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR9;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR9 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR10;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR10 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR11;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR11 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR12;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR12 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR13;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR13 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR14;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR14 core=RAM_2P_BRAM
|
||||
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR15;
|
||||
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR15 core=RAM_2P_BRAM
|
||||
|
||||
/*
|
||||
* Set Initial Values.
|
||||
*/
|
||||
bytes_count = 0;
|
||||
first = 0;
|
||||
second = 3;
|
||||
last = 2;
|
||||
temp = 0;
|
||||
|
||||
|
||||
//Calculate the Number of Columns that Should be Stored to each Sector Buffer.
|
||||
//NOTE that this is the Initial Sector Size that is Equal to All the Sector Buffers.
|
||||
sector_size = (int)(cols / SECTORS);
|
||||
|
||||
/*
|
||||
* Calculate any Remaining Bytes in Case the Number of Columns is not an Integer Multiple of the Number of Sector Buffers.
|
||||
*
|
||||
* For Example, for an Image of Width 524 Pixels we Have 524 Pixels / 16 Sectors = 32.75 Pixels which is Not an Integer Multiple of the 16 Sector Buffers.
|
||||
* For each Sector Buffer we Have a Sector Size of 32 Pixels so 32 Pixels * 16 Sectors = 512 which Leads to Have 12 Remaining Pixels from the Initial 524.
|
||||
*
|
||||
* As a Result each of the 16 Sector Buffers Initially Has a Sector Size of 32.
|
||||
* The Remaining Pixels Should be Distributed to the Sector Buffers so the First 12 Sector Buffers will Have a Sector Size with one More Pixel which Leads to 33 Pixels Sector Size.
|
||||
*
|
||||
* |Sector0 |Sector1 |Sector2 |Sector3 |Sector4 |Sector5 |Sector6 |Sector7 |Sector8 |Sector9 |Sector10 |Sector11 |Sector12 |Sector13 |Sector14 |Sector15|
|
||||
* |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |32 |32 |32 |32 |
|
||||
*
|
||||
*/
|
||||
remaining_pixels = cols - (sector_size * SECTORS);
|
||||
|
||||
//Loop to Distribute the Remaining Bytes to the Sector Buffers.
|
||||
for (int i = 0; i < SECTORS; i++)
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
//Set the Array Field of the Corresponding Sector Buffer with the Initial Sector Size.
|
||||
sector_iter[i] = sector_size;
|
||||
|
||||
//Check if we still Have Remaining Pixels
|
||||
if (remaining_pixels > 0)
|
||||
{
|
||||
//Decrease the Number of Remaining Pixels.
|
||||
remaining_pixels--;
|
||||
|
||||
//Increment by 1 the Sector Size of the Corresponding Sector Buffer
|
||||
sector_iter[i] ++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The First Line/Row of an Image Processed with Sobel Edge Detection is Always Filled with Zero Pixels.
|
||||
* So, Send the First Row of Zero Pixels.
|
||||
*/
|
||||
send_1st_line:
|
||||
for (int col=0; col<cols; col++)
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
AXI_PIXEL output_pixel; //Declare a AXI_PIXEL that Represents the AXI Stream Output Interface.
|
||||
|
||||
output_pixel.strb = 0xF; //Set the Strobe of the AXI Stream Interface so that all 4 Transmitted Bytes are Valid.
|
||||
output_pixel.user = 0x1;
|
||||
output_pixel.tdest = 0x1;
|
||||
|
||||
//If the Packet Mode is Enabled then we Have to Set the TLAST to 1 if a Full Packet of Size packet_size is Transmitted.
|
||||
if (packet_mode_en == 1)
|
||||
{
|
||||
//The TLAST(last) Gets the Return Value of the is_packet_complete() which Returns 1 if the Number of Transmitted Bytes is Equal to the Packket Size.
|
||||
output_pixel.last = is_packet_complete(&bytes_count, packet_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_pixel.last = 0x0;
|
||||
}
|
||||
|
||||
//Set the Data to Transmit to Have Zero Value Since we Transmit Zero Pixels.
|
||||
output_pixel.data = 0x0;
|
||||
|
||||
//Forward the Data along with the Rest Signals to the AXI Stream Output Interface.
|
||||
STREAM_OUT[col] = output_pixel;
|
||||
}
|
||||
|
||||
/*
|
||||
* The Sobel Edge Detection Algorithm Requires Three Rows in Order to Produce one Processed Row.
|
||||
* So, Pre-Fetch the First 3 Rows.
|
||||
*/
|
||||
prefetch_3lines:
|
||||
for (int row=0; row<3; row++)
|
||||
{
|
||||
//Receive a Row which is Distributed to the 16 Four Line Sector Buffers of Type LINE4_SECTOR_BUFFER.
|
||||
receive_post_line(STREAM_IN,
|
||||
&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
temp,
|
||||
sector_iter
|
||||
);
|
||||
|
||||
//Calculate the Vertical Position where the Next Received Row Should be Stored in the Four Line Sector Buffers.
|
||||
temp = (temp+3)%4;
|
||||
}
|
||||
|
||||
/*
|
||||
* Produce a Processed Row from the 3 Pre-Fetched Rows.
|
||||
* Send the Processed Row and then Receive a New Row in Order to Produce again a Processed Row.
|
||||
*
|
||||
* Loop Until Receiving all the Rows of the Image.
|
||||
*/
|
||||
proc_module:
|
||||
for (int row=0; row<rows-3; row++)
|
||||
{
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR0 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR1 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR2 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR3 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR4 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR5 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR6 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR7 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR8 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR9 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR10 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR11 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR12 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR13 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR14 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR15 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR0 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR1 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR2 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR3 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR4 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR5 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR6 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR7 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR8 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR9 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR10 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR11 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR12 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR13 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR14 array //false
|
||||
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR15 array //false
|
||||
|
||||
//Process the 3 Rows that are Received in the 16 Four Line Sector Buffers and Produce one Processed Row.
|
||||
start_sobel_operations(&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_size,
|
||||
first,
|
||||
second,
|
||||
last
|
||||
);
|
||||
|
||||
//Set the First Pixel of the First One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the First Pixel of the First One Line Sector Buffer is Actually the First Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR0.insert(zero_pixel, 0, 0);
|
||||
|
||||
//Set the Last Pixel of the Last One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the Last Pixel of the Last One Line Sector Buffer is Actually the Last Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR15.insert(zero_pixel, 0, sector_iter[15]-1);
|
||||
|
||||
//The Produced Row from the three Processed Rows is Stored in the 16 One Line Sector Buffers.
|
||||
//Send the Produced Row over the AXI Stream Out Interface.
|
||||
send_line(STREAM_OUT,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_iter,
|
||||
packet_mode_en,
|
||||
packet_size,
|
||||
&bytes_count
|
||||
);
|
||||
|
||||
//Receive the Next Row which is Distributed to the 16 Four Line Sector Buffers.
|
||||
receive_post_line(STREAM_IN,
|
||||
&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
temp,
|
||||
sector_iter
|
||||
);
|
||||
|
||||
//Calculate which Should be Considered as the First Line for Producing the Next Processed Row.
|
||||
first = (first+3)%4;
|
||||
|
||||
//Calculate which Should be Considered as the Second Line for Producing the Next Processed Row.
|
||||
second = (second+3)%4;
|
||||
|
||||
//Calculate which Should be Considered as the Last Line for Producing the Next Processed Row.
|
||||
last = (last+3)%4;
|
||||
|
||||
//Calculate where the New Received Row Should be Stored in the Four Line Sector Buffers.
|
||||
temp = (temp+3)%4;
|
||||
}
|
||||
|
||||
//The Previous for Loop Ended before Processing the Last Received Row.
|
||||
//So, Process here the Last 3 Rows to Produce the Last Processed Row.
|
||||
start_sobel_operations(&LINE4_BUFFER_SECTOR0,
|
||||
&LINE4_BUFFER_SECTOR1,
|
||||
&LINE4_BUFFER_SECTOR2,
|
||||
&LINE4_BUFFER_SECTOR3,
|
||||
&LINE4_BUFFER_SECTOR4,
|
||||
&LINE4_BUFFER_SECTOR5,
|
||||
&LINE4_BUFFER_SECTOR6,
|
||||
&LINE4_BUFFER_SECTOR7,
|
||||
&LINE4_BUFFER_SECTOR8,
|
||||
&LINE4_BUFFER_SECTOR9,
|
||||
&LINE4_BUFFER_SECTOR10,
|
||||
&LINE4_BUFFER_SECTOR11,
|
||||
&LINE4_BUFFER_SECTOR12,
|
||||
&LINE4_BUFFER_SECTOR13,
|
||||
&LINE4_BUFFER_SECTOR14,
|
||||
&LINE4_BUFFER_SECTOR15,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_size,
|
||||
first,
|
||||
second,
|
||||
last);
|
||||
|
||||
//Set the First Pixel of the First One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the First Pixel of the First One Line Sector Buffer is Actually the First Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR0.insert(zero_pixel, 0, 0);
|
||||
|
||||
//Set the Last Pixel of the Last One Line Sector Buffer to be a Dark/Zero Pixel.
|
||||
//NOTE the Last Pixel of the Last One Line Sector Buffer is Actually the Last Pixel of a Row.
|
||||
LINE1_BUFFER_SECTOR15.insert(zero_pixel, 0, sector_iter[15]-1);
|
||||
|
||||
//Send the Last Produced Row over the AXI Stream Out Interface.
|
||||
send_line(STREAM_OUT,
|
||||
&LINE1_BUFFER_SECTOR0,
|
||||
&LINE1_BUFFER_SECTOR1,
|
||||
&LINE1_BUFFER_SECTOR2,
|
||||
&LINE1_BUFFER_SECTOR3,
|
||||
&LINE1_BUFFER_SECTOR4,
|
||||
&LINE1_BUFFER_SECTOR5,
|
||||
&LINE1_BUFFER_SECTOR6,
|
||||
&LINE1_BUFFER_SECTOR7,
|
||||
&LINE1_BUFFER_SECTOR8,
|
||||
&LINE1_BUFFER_SECTOR9,
|
||||
&LINE1_BUFFER_SECTOR10,
|
||||
&LINE1_BUFFER_SECTOR11,
|
||||
&LINE1_BUFFER_SECTOR12,
|
||||
&LINE1_BUFFER_SECTOR13,
|
||||
&LINE1_BUFFER_SECTOR14,
|
||||
&LINE1_BUFFER_SECTOR15,
|
||||
sector_iter,
|
||||
packet_mode_en,
|
||||
packet_size,
|
||||
&bytes_count
|
||||
);
|
||||
|
||||
/*
|
||||
* The Last Line/Row of an Image Processed with Sobel Edge Detection is Always Filled with Zero Pixels.
|
||||
* So, Send the Last Row of Zero Pixels.
|
||||
*/
|
||||
send_last_line:
|
||||
for (int col=0; col<cols; col++)
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
AXI_PIXEL output_pixel; //Declare a AXI_PIXEL that Represents the AXI Stream Output Interface.
|
||||
|
||||
output_pixel.strb = 0xF; //Set the Strobe of the AXI Stream Interface so that all 4 Transmitted Bytes are Valid.
|
||||
output_pixel.user = 0x1;
|
||||
output_pixel.tdest = 0x1;
|
||||
|
||||
//Since this is the Last Row Check if this is the Last Pixel to Send.
|
||||
if (col==cols-1 )
|
||||
{
|
||||
//Set the TLAST Signal to 1 to Indicate that this will be the Last Transmission of the Data.
|
||||
output_pixel.last = 0x1;
|
||||
}
|
||||
//If the Packet Mode is Enabled then we Have to Set the TLAST to 1 if a Full Packet of Size packet_size is Transmitted.
|
||||
else if(packet_mode_en == 1)
|
||||
{
|
||||
//The TLAST(last) Gets the Return Value of the is_packet_complete() which Returns 1 if the Number of Transmitted Bytes is Equal to the Packet Size.
|
||||
output_pixel.last = is_packet_complete(&bytes_count, packet_size);
|
||||
}
|
||||
else
|
||||
output_pixel.last = 0x0;
|
||||
|
||||
//Set the Data to Transmit to Have Zero Value Since we Transmit Zero Pixels.
|
||||
output_pixel.data = 0x0;
|
||||
|
||||
//Forward the Data along with the Rest Signals to the AXI Stream Output Interface.
|
||||
STREAM_OUT[col] = output_pixel;
|
||||
}
|
||||
|
||||
bytes_count = 0; // Byte Counter
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
31
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.h
Normal file
31
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef _SOBEL_H_
|
||||
#define _SOBEL_H_
|
||||
#include "ap_bmp.h"
|
||||
#include "ap_axi_sdata2.h"
|
||||
#include "ap_int.h"
|
||||
#include "ap_utils.h"
|
||||
#include "ap_video.h"
|
||||
|
||||
#define MAX_WIDTH 1920
|
||||
#define MAX_HEIGHT 1080
|
||||
|
||||
#define SECTORS 16
|
||||
|
||||
#define ABSDIFF(x,y) ((x>y)? x - y : y - x)
|
||||
#define ABS(x) ((x>0)? x : -x)
|
||||
#define RGB(r,g,b) ((((word)r)<<16)|(((word)g)<<8)|((word)b))
|
||||
|
||||
typedef ap_rgb <8, 8, 8> RGB;
|
||||
typedef ap_axiu2 <32, 1, 1, 1> AXI_PIXEL;
|
||||
|
||||
typedef ap_linebuffer <unsigned char, 4, (MAX_WIDTH/SECTORS)+2> LINE4_SECTOR_BUFFER;
|
||||
typedef ap_linebuffer <RGB, 1, (MAX_WIDTH/SECTORS)+1> LINE1_SECTOR_BUFFER;
|
||||
|
||||
int sobel_filter(AXI_PIXEL STREAM_IN[MAX_WIDTH],
|
||||
AXI_PIXEL STREAM_OUT[MAX_WIDTH],
|
||||
int rows,
|
||||
int cols,
|
||||
int packet_mode_en,
|
||||
int packet_size);
|
||||
|
||||
#endif
|
||||
1456
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.cpp
Normal file
1456
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.cpp
Normal file
File diff suppressed because it is too large
Load Diff
195
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.h
Normal file
195
Hardware/Vivado_HLS_IPs/Sobel_Filter/sobel_operations.h
Normal file
@@ -0,0 +1,195 @@
|
||||
#ifndef _SOBEL_OPERATIONS_H_
|
||||
#define _SOBEL_OPERATIONS_H_
|
||||
|
||||
unsigned char rgb2y(RGB pix);
|
||||
|
||||
/*
|
||||
* Template of the sobel_operator()
|
||||
*
|
||||
* The sobel_operator() Makes Sobel Computation Using a 3x3 Neighborhood
|
||||
*/
|
||||
template<int dummy_index>
|
||||
RGB sobel_operator(LINE4_SECTOR_BUFFER *window,
|
||||
unsigned int x_index,
|
||||
unsigned int y_first,
|
||||
unsigned int y_second,
|
||||
unsigned int y_last)
|
||||
{
|
||||
#pragma HLS INLINE off
|
||||
#pragma HLS EXPRESSION_BALANCE off
|
||||
|
||||
short x_weight = 0;
|
||||
short y_weight = 0;
|
||||
|
||||
short x_weight_array[9];
|
||||
#pragma HLS ARRAY_PARTITION variable=x_weight_array complete dim=1
|
||||
short y_weight_array[9];
|
||||
#pragma HLS ARRAY_PARTITION variable=y_weight_array complete dim=1
|
||||
|
||||
short edge_weight;
|
||||
unsigned char edge_val;
|
||||
RGB pixel;
|
||||
|
||||
const char x_op[3][3] = { {-1, 0, 1},
|
||||
{-2, 0, 2},
|
||||
{-1, 0, 1}};
|
||||
#pragma HLS ARRAY_PARTITION variable=x_op complete dim=1
|
||||
|
||||
const char y_op[3][3] = { { 1, 2, 1},
|
||||
{ 0, 0, 0},
|
||||
{-1,-2,-1}};
|
||||
#pragma HLS ARRAY_PARTITION variable=y_op complete dim=1
|
||||
|
||||
sobel_mul:
|
||||
{
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
//Compute Approximation of the Gradients in the X-Y Direction for the First Row of x_op and y_op.
|
||||
for(char j = 0; j < 3; j++)
|
||||
{
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight_array[j] = (window->getval(y_first,x_index + j) * x_op[0][j]);
|
||||
// Y Direction Gradient
|
||||
y_weight_array[j] = (window->getval(y_first,x_index + j) * y_op[0][j]);
|
||||
}
|
||||
|
||||
//Compute Approximation of the Gradients in the X-Y Direction for the Second Row of x_op and y_op.
|
||||
for(char j = 0; j < 3; j++)
|
||||
{
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight_array[3+j] = (window->getval(y_second,x_index + j) * x_op[1][j]);
|
||||
// Y Direction Gradient
|
||||
y_weight_array[3+j] = (window->getval(y_second,x_index + j) * y_op[1][j]);
|
||||
}
|
||||
|
||||
//Compute Approximation of the Gradients in the X-Y Direction for the Third Row of x_op and y_op.
|
||||
for(char j = 0; j < 3; j++){
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight_array[6+j] = (window->getval(y_last,x_index + j) * x_op[2][j]);
|
||||
// Y Direction Gradient
|
||||
y_weight_array[6+j] = (window->getval(y_last,x_index + j) * y_op[2][j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for(char j = 0; j < 9; j++) {
|
||||
#pragma HLS UNROLL
|
||||
#pragma HLS PIPELINE II=1
|
||||
|
||||
// X Direction Gradient
|
||||
x_weight += x_weight_array[j];
|
||||
// Y Direction Gradient
|
||||
y_weight += y_weight_array[j];
|
||||
}
|
||||
|
||||
edge_weight = ABS(x_weight) + ABS(y_weight);
|
||||
|
||||
edge_val = (255-(unsigned char)(edge_weight));
|
||||
|
||||
//Edge Thresholding
|
||||
if(edge_val > 200)
|
||||
{
|
||||
edge_val = 255;
|
||||
}
|
||||
else if(edge_val < 100)
|
||||
{
|
||||
edge_val = 0;
|
||||
}
|
||||
|
||||
pixel.R = pixel.G = pixel.B = edge_val;
|
||||
|
||||
return pixel;
|
||||
}
|
||||
|
||||
void start_sobel_operations(
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_0,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_1,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_2,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_3,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_4,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_5,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_6,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_7,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_8,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_9,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_10,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_11,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_12,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_13,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_14,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_15,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_0,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_1,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_2,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_3,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_4,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_5,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_6,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_7,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_8,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_9,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_10,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_11,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_12,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_13,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_14,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_15,
|
||||
int sector_size,
|
||||
int first,
|
||||
int second,
|
||||
int last);
|
||||
|
||||
void send_line(
|
||||
AXI_PIXEL *STREAM_OUT,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_0,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_1,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_2,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_3,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_4,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_5,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_6,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_7,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_8,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_9,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_10,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_11,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_12,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_13,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_14,
|
||||
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_15,
|
||||
int *sector_iter_array,
|
||||
int packet_mode_enable,
|
||||
int packet_size,
|
||||
int *remain_bytes);
|
||||
|
||||
void receive_post_line(
|
||||
AXI_PIXEL *STREAM_IN,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_0,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_1,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_2,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_3,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_4,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_5,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_6,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_7,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_8,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_9,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_10,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_11,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_12,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_13,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_14,
|
||||
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_15,
|
||||
int row,
|
||||
int *sector_iter_array);
|
||||
|
||||
#endif
|
||||
49
Hardware/create_project.tcl
Normal file
49
Hardware/create_project.tcl
Normal file
@@ -0,0 +1,49 @@
|
||||
##################################################################################
|
||||
# #
|
||||
# This Script #
|
||||
# 1.Creates a New Vivado Project #
|
||||
# 2.Generates the Block Design Described in "pcie_acceleration_vc707_design.tcl" #
|
||||
# 3.Imports the Required Constraint File "constraints.xdc" #
|
||||
# 4.Imports the Required HDL Wrapper File "hdl_wrapper.v" #
|
||||
# #
|
||||
##################################################################################
|
||||
|
||||
set relative_directory [pwd]
|
||||
|
||||
set project_directory $relative_directory/pcie_acceleration_vc707
|
||||
|
||||
set ip_repository $relative_directory/Vivado_HLS_IPs
|
||||
|
||||
set constraints_directory $relative_directory/Constraints
|
||||
|
||||
set hdl_wrapper_directory $relative_directory/HDL_Wrapper
|
||||
|
||||
set block_design_directory $relative_directory/Vivado_Block_Design
|
||||
|
||||
set src_bd_design_directory $relative_directory/pcie_acceleration_vc707/pcie_acceleration_vc707.srcs/sources_1/bd/pcie_acceleration_vc707_design
|
||||
|
||||
#Create a New Project Named "pcie_accel_demo"
|
||||
create_project pcie_accel_demo $project_directory -part xc7vx485tffg1761-2
|
||||
|
||||
#Set the Board Part which is Required for Certain Configurations such as the Uartlite Controller (RS-232)
|
||||
set_property board_part xilinx.com:vc707:part0:1.2 [current_project]
|
||||
|
||||
#Add the HLS IPs before Opening the Block Design
|
||||
set_property ip_repo_paths {Vivado_HLS_IPs/Acceleration_Scheduler_Direct Vivado_HLS_IPs/Acceleration_Scheduler_Indirect Vivado_HLS_IPs/Acceleration_Scheduler_SG_XDMA Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler Vivado_HLS_IPs/Fetch_Scheduler Vivado_HLS_IPs/Interrupt_Manager Vivado_HLS_IPs/Info_Memory_Block Vivado_HLS_IPs/Send_Scheduler Vivado_HLS_IPs/Sobel_Filter} [current_project]
|
||||
update_ip_catalog
|
||||
|
||||
#Add the Block Design
|
||||
source $block_design_directory/pcie_acceleration_vc707_design.tcl
|
||||
|
||||
#Add Constraint Files
|
||||
add_files -fileset constrs_1 -norecurse $constraints_directory/constraints.xdc
|
||||
import_files -fileset constrs_1 $constraints_directory/constraints.xdc
|
||||
|
||||
#Add the HDL Wrapper
|
||||
add_files -norecurse -scan_for_includes $hdl_wrapper_directory/hdl_wrapper.v
|
||||
import_files -norecurse $hdl_wrapper_directory/hdl_wrapper.v
|
||||
update_compile_order -fileset sources_1
|
||||
update_compile_order -fileset sources_1
|
||||
update_compile_order -fileset sim_1
|
||||
|
||||
|
||||
0
Images/.keep
Normal file
0
Images/.keep
Normal file
BIN
Images/system_overview.png
Normal file
BIN
Images/system_overview.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 87 KiB |
112
README.md
Normal file
112
README.md
Normal file
@@ -0,0 +1,112 @@
|
||||
# FPGA Hardware Acceleration over PCIe
|
||||
|
||||
## What This Is
|
||||
|
||||
Multi-threaded Linux application + custom kernel driver + FPGA hardware design that accelerates Sobel edge detection on images. Demonstrates full-stack embedded systems engineering from RTL to application layer.
|
||||
|
||||
![System Overview][system_overview]
|
||||
|
||||
## Project Summary
|
||||
This project implements a **full-stack hardware acceleration platform** that offloads compute-intensive image processing tasks from a multi-threaded Linux application to custom FPGA accelerators connected via PCIe. The system demonstrates advanced concepts in **computer architecture, hardware-software co-design, parallel processing, and driver development**.
|
||||
|
||||
### Key Achievement
|
||||
Successfully designed and implemented a multi-acceleration-group architecture supporting **concurrent hardware acceleration** for up to **16 simultaneous threads**, with intelligent resource scheduling and DMA-based data transfers.
|
||||
|
||||
- **See [Engineering Challenges Solved](#engineering-challenges-solved)**
|
||||
|
||||
## Technical Overview
|
||||
|
||||
**Hardware (Xilinx Virtex-7 FPGA)**
|
||||
- 7 parallel acceleration units supporting up to 16 concurrent threads
|
||||
- Custom IP cores designed in C/C++ (Vivado HLS), synthesized to RTL
|
||||
- PCIe Gen2 x4 interface with DMA engines for high-throughput data transfer
|
||||
- Sobel filter accelerator processing up to 1080p images
|
||||
|
||||
**Software (Linux)**
|
||||
- **Kernel driver**: PCIe device management, MSI interrupts, multi-thread resource scheduling
|
||||
- **User application**: pthreads, memory-mapped I/O, DMA buffer management
|
||||
- **MicroBlaze firmware**: FPGA system initialization
|
||||
|
||||
---
|
||||
## Architecture Highlights
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────┐
|
||||
│ Multi-threaded Application (pthreads) │
|
||||
└──────────────┬──────────────────────────┘
|
||||
│ ioctl(), mmap()
|
||||
┌──────────────▼──────────────────────────┐
|
||||
│ Kernel Driver (Resource Scheduler) │ ← Thread arbitration, DMA setup
|
||||
└──────────────┬──────────────────────────┘
|
||||
│ PCIe, MSI Interrupts
|
||||
┌──────────────▼──────────────────────────┐
|
||||
│ FPGA Hardware (7 Accel Groups) │ ← Parallel processing
|
||||
│ • Fetch/Send Schedulers (DMA) │
|
||||
│ • Sobel Filter Accelerators │
|
||||
│ • Interrupt Manager │
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Why 7 acceleration groups?**
|
||||
- 2 Direct-mode (PCIe → BRAM, low latency)
|
||||
- 4 Indirect-mode (PCIe → DDR3, higher throughput)
|
||||
- 1 Scatter-Gather (supports fragmented user memory)
|
||||
|
||||
Each can process different images simultaneously with driver-managed scheduling.
|
||||
|
||||
---
|
||||
|
||||
## Engineering Challenges Solved
|
||||
|
||||
**1. Multi-thread resource arbitration**
|
||||
16 threads competing for 7 hardware units → Implemented two scheduling policies (greedy, best-available) in kernel driver with per-thread state tracking
|
||||
|
||||
**2. PCIe interrupt routing**
|
||||
Designed custom Interrupt Manager IP to map 7 accelerators to MSI vectors, coordinated with GPIO-triggered interrupts
|
||||
|
||||
**3. Zero-copy DMA from userspace**
|
||||
Used `get_user_pages()` + scatter-gather tables for direct DMA to/from application buffers without memcpy overhead
|
||||
|
||||
**4. Hardware-software timing correlation**
|
||||
FPGA global timer accessible via memory-mapped registers for nanosecond-precision performance analysis
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
- **Throughput**: Supports 16 concurrent requests with linear scaling up to 7 threads
|
||||
- **Latency**: ~50-100 μs for VGA images (640x480)
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Generate custom IPs (one-time)
|
||||
cd Hardware/Vivado_HLS_IPs/Sobel_Filter && vivado_hls run_hls.tcl
|
||||
# ... repeat for 8 other IPs
|
||||
|
||||
# Build bitstream
|
||||
cd Hardware && vivado -source create_project.tcl
|
||||
# Flow → Generate Bitstream
|
||||
|
||||
# Load driver & run
|
||||
cd Software/Linux_App_Driver
|
||||
make
|
||||
./make_device
|
||||
insmod xilinx_pci_driver.ko
|
||||
./ui image. bmp 100 16 1 10 # 100 iterations, 16 threads
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Repository Structure
|
||||
|
||||
```
|
||||
Hardware/Vivado_HLS_IPs/ 9 custom IP cores (C++ → RTL)
|
||||
Hardware/Vivado_Block_Design/ System integration (AXI, PCIe, DDR3)
|
||||
Software/Linux_App_Driver/ Kernel driver + test application
|
||||
Software/Microblaze_XSDK/ FPGA firmware
|
||||
```
|
||||
|
||||
[system_overview]: /Images/system_overview.png "System Overview Diagram"
|
||||
0
Software/.keep
Normal file
0
Software/.keep
Normal file
0
Software/Linux_App_Driver/.keep
Normal file
0
Software/Linux_App_Driver/.keep
Normal file
13
Software/Linux_App_Driver/Makefile
Normal file
13
Software/Linux_App_Driver/Makefile
Normal file
@@ -0,0 +1,13 @@
|
||||
obj-m := xilinx_pci_driver.o
|
||||
|
||||
XILINX_PCI_DRIVER_HOME := $(shell pwd)
|
||||
|
||||
XILINX_PCI_DRIVER_KVER := $(shell uname -r)
|
||||
|
||||
all:
|
||||
make -C /lib/modules/$(XILINX_PCI_DRIVER_KVER)/build M=$(XILINX_PCI_DRIVER_HOME) modules
|
||||
g++ ui.cpp -o ui -pthread
|
||||
clean:
|
||||
make -C /lib/modules/$(XILINX_PCI_DRIVER_KVER)/build M=$(XILINX_PCI_DRIVER_HOME) clean
|
||||
|
||||
|
||||
BIN
Software/Linux_App_Driver/Results/hd.bmp
Normal file
BIN
Software/Linux_App_Driver/Results/hd.bmp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.6 MiB |
BIN
Software/Linux_App_Driver/Results/qvga.bmp
Normal file
BIN
Software/Linux_App_Driver/Results/qvga.bmp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 225 KiB |
1
Software/Linux_App_Driver/Results/renamer.txt
Normal file
1
Software/Linux_App_Driver/Results/renamer.txt
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
BIN
Software/Linux_App_Driver/Results/vga.bmp
Normal file
BIN
Software/Linux_App_Driver/Results/vga.bmp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 900 KiB |
7
Software/Linux_App_Driver/make_device
Normal file
7
Software/Linux_App_Driver/make_device
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
mount -t debugfs none /sys/kernel/debug/
|
||||
rm -rf /dev/xilinx_pci_driver
|
||||
mknod /dev/xilinx_pci_driver c 240 1
|
||||
chown root /dev/xilinx_pci_driver
|
||||
chmod 0644 /dev/xilinx_pci_driver
|
||||
ls -al /dev/xilinx_pci_driver
|
||||
3554
Software/Linux_App_Driver/ui.cpp
Normal file
3554
Software/Linux_App_Driver/ui.cpp
Normal file
File diff suppressed because it is too large
Load Diff
5732
Software/Linux_App_Driver/xilinx_pci_driver.c
Normal file
5732
Software/Linux_App_Driver/xilinx_pci_driver.c
Normal file
File diff suppressed because it is too large
Load Diff
551
Software/Linux_App_Driver/xilinx_pci_driver.h
Normal file
551
Software/Linux_App_Driver/xilinx_pci_driver.h
Normal file
@@ -0,0 +1,551 @@
|
||||
/**
|
||||
*
|
||||
* This Header File Contains the Necessary Macros for the Kernel Driver Module and the Userspace Application
|
||||
*
|
||||
*/
|
||||
|
||||
//---Valid Macros To Keep---------------------------------------------//
|
||||
|
||||
#define KC705_PCI_VENDOR_ID 0x10EE
|
||||
#define KC705_PCI_DEVICE_ID 0x7021
|
||||
|
||||
#define VC707_PCI_VENDOR_ID 0x10EE
|
||||
#define VC707_PCI_DEVICE_ID 0x7022
|
||||
|
||||
#define VENDOR_ID VC707_PCI_VENDOR_ID
|
||||
#define DEVICE_ID VC707_PCI_DEVICE_ID
|
||||
|
||||
#define HAVE_REGION 0x01 // I/O Memory region
|
||||
#define HAVE_IRQ 0x02 // Interupt
|
||||
#define HAVE_KREG 0x04 // Kernel Registration
|
||||
#define HAVE_DEBUGFS 0x08 // Debugfs File Creation
|
||||
|
||||
#define DEFAULT_SIGNAL_0 34
|
||||
#define DEFAULT_SIGNAL_1 35
|
||||
#define DEFAULT_SIGNAL_2 36
|
||||
#define DEFAULT_SIGNAL_3 37
|
||||
#define DEFAULT_SIGNAL_4 38
|
||||
#define DEFAULT_SIGNAL_5 39
|
||||
#define DEFAULT_SIGNAL_6 40
|
||||
#define DEFAULT_SIGNAL_SG 41
|
||||
|
||||
#define OCCUPIED 1
|
||||
#define NOT_OCCUPIED 0
|
||||
|
||||
/**
|
||||
* @note
|
||||
*
|
||||
* The Following Macro Line Works as a Switch.
|
||||
* Remove/Add One of the '*' at the Beggining to Change the State of the Switch.
|
||||
* One '*' Enables --> BEST_AVAILABLE.
|
||||
* Two '*' Enable --> GREEDY.
|
||||
* This is Used to Safely Disable/Enable Specific Code Parts of the Driver.
|
||||
*/
|
||||
|
||||
/*/ #define BEST_AVAILABLE /*/ #define GREEDY /**/
|
||||
|
||||
/** @note
|
||||
*
|
||||
* The Macros Below are Used to Enable/Disable Debug Messages.
|
||||
*
|
||||
* The DEBUG_MESSAGES is Used to Print the Driver's Debug Messages to the /var/log/kern.log File.
|
||||
* The DEBUG_MESSAGES_UI is Used to Print the Userspace Application's Debug messages to the Terminal.
|
||||
*
|
||||
* Uncomment to Enable the Messages Debugging.
|
||||
*/
|
||||
//#define DEBUG_MESSAGES
|
||||
//#define DEBUG_MESSAGES_UI
|
||||
|
||||
#define SUCCESS 0
|
||||
#define FAILURE 1
|
||||
|
||||
#define BYTE 1
|
||||
#define KBYTE 1024
|
||||
#define MBYTE 1048576
|
||||
|
||||
#define START 0x1
|
||||
#define ACK 0x1
|
||||
|
||||
|
||||
#define MMAP_ALLOCATION_SIZE 4 * MBYTE
|
||||
#define POSIX_ALLOCATED_SIZE 32 * MBYTE
|
||||
#define KERNEL_ALLOCATION_SIZE 4 * MBYTE
|
||||
|
||||
|
||||
#define OPERATION_START_TIMER 0x18000000
|
||||
|
||||
|
||||
#define BAR0_32BIT 0 //For 32 Bit Addressing
|
||||
#define BAR1_32BIT 1 //For 32 Bit Addressing
|
||||
#define BAR2_32BIT 2 //For 32 Bit Addressing
|
||||
#define BAR3_32BIT 3 //For 32 Bit Addressing
|
||||
#define BAR4_32BIT 4 //For 32 Bit Addressing
|
||||
#define BAR5_32BIT 5 //For 32 Bit Addressing
|
||||
|
||||
#define BAR0_64BIT 0 //For 64 Bit Addressing
|
||||
#define BAR1_64BIT 2 //For 64 Bit Addressing
|
||||
#define BAR2_64BIT 4 //For 64 Bit Addressing
|
||||
|
||||
|
||||
#define ACCELERATOR_DIRECT_0_OCCUPIED 0x01
|
||||
#define ACCELERATOR_DIRECT_1_OCCUPIED 0x02
|
||||
#define ACCELERATOR_INDIRECT_0_OCCUPIED 0x04
|
||||
#define ACCELERATOR_INDIRECT_1_OCCUPIED 0x08
|
||||
#define ACCELERATOR_INDIRECT_2_OCCUPIED 0x10
|
||||
#define ACCELERATOR_INDIRECT_3_OCCUPIED 0x20
|
||||
#define ACCELERATOR_SG_OCCUPIED 0x40
|
||||
#define ACCELERATOR_ALL_OCCUPIED 0x3F
|
||||
#define ACCELERATOR_NO_OCCUPIED 0x00
|
||||
|
||||
|
||||
#define ENABLE_GCC_MC 0x00010001 //Enable Global Clock Counter and Metrics Counter Mask
|
||||
#define RESET_GCC_MC 0x00020002 //Reset Global Clock Counter and Metrics Counter Mask
|
||||
|
||||
#define APM_CR_OFFSET 0x300 //AXI Performance Monitor Control Register Offset(0x60 for Long Int Offset 0x300 for Byte Offset)
|
||||
#define APM_GCC_LOWER_OFFSET 0X0004 //Global Clock Counter Lower 32Bits Register
|
||||
#define APM_GCC_UPPER_OFFSET 0X0000 //Global Clock Counter Upper 32Bits Register
|
||||
|
||||
#define METRIC_SELECTOR_REGISTER_0_OFFSET 0X0044
|
||||
#define METRIC_SELECTOR_REGISTER_1_OFFSET 0X0048
|
||||
#define METRIC_SELECTOR_REGISTER_2_OFFSET 0X004C
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// PCIe BAR0 Address Space -Mapping the FPGA AXI Address Space (HW Peripherals)
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define BAR0_OFFSET_INTERRUPT_CONTROLLER 0x00020000
|
||||
#define BAR0_OFFSET_UARTLITE 0x00010000
|
||||
#define BAR0_OFFSET_PCIE_CTL 0x00020000
|
||||
#define BAR0_OFFSET_GPIO_PCIE_INTERRUPT 0x00030000
|
||||
#define BAR0_OFFSET_GPIO_MSI 0x00040000
|
||||
#define BAR0_OFFSET_TIMER 0x00050000
|
||||
#define BAR0_OFFSET_FETCH_SCHEDULER 0x00060000
|
||||
#define BAR0_OFFSET_SEND_SCHEDULER 0x00070000
|
||||
|
||||
#define BAR0_OFFSET_SCHEDULER_BUFFER_FETCH 0x00080000
|
||||
#define BAR0_OFFSET_SCHEDULER_BUFFER_SEND 0x00090000
|
||||
|
||||
#define BAR0_OFFSET_CDMA_FETCH 0x000A0000
|
||||
#define BAR0_OFFSET_CDMA_SEND 0x000B0000
|
||||
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_ACCELERATION_SCHEDULER_DIRECT 0x000C0000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_APM 0x000D0000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_DMA 0x000E0000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_SOBEL_FILTER 0x000F0000
|
||||
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_ACCELERATION_SCHEDULER_DIRECT 0x00100000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_APM 0x00110000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_DMA 0x00120000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_SOBEL_FILTER 0x00130000
|
||||
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_ACCELERATION_SCHEDULER_INDIRECT 0x00140000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_APM 0x00150000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_DMA 0x00160000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_SOBEL_FILTER 0x00170000
|
||||
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_ACCELERATION_SCHEDULER_INDIRECT 0x00180000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_APM 0x00190000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_DMA 0x001A0000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_SOBEL_FILTER 0x001B0000
|
||||
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_ACCELERATION_SCHEDULER_INDIRECT 0x001C0000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_APM 0x001D0000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_DMA 0x001E0000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_SOBEL_FILTER 0x001F0000
|
||||
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_ACCELERATION_SCHEDULER_INDIRECT 0x00200000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_APM 0x00210000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_DMA 0x00220000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_SOBEL_FILTER 0x00230000
|
||||
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_SG_ACCELERATION_SCHEDULER_SG 0x00240000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_SG_APM 0x00250000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_SG_DMA_SG_PCIE_SCHEDULER 0x00260000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_SG_SOBEL_FILTER_4K 0x00280000
|
||||
#define BAR0_OFFSET_ACCEL_GROUP_SG_DMA 0x00290000
|
||||
|
||||
#define BAR0_OFFSET_GPIO_MSI_READ 0x00300000
|
||||
#define BAR0_OFFSET_INTERRUPT_MANAGER 0x00310000
|
||||
#define BAR0_OFFSET_GPIO_ACK 0x00320000
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Acceleration Scheduler Direct Register Offsets
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_CONTROL_REGISTER_OFFSET 0X00
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_GIE_REGISTER_OFFSET 0X04
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_IER_REGISTER_OFFSET 0X08
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_ISR_REGISTER_OFFSET 0X0C
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_DMA_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X18
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_SOBEL_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X20
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_GPIO_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X28
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_APM_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X30
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_SHARED_APM_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X38
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_SHARED_METRICS_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X40
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_IMAGE_COLUMNS_REGISTER_OFFSET 0X48
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_IMAGE_ROWS_REGISTER_OFFSET 0X50
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_HOST_SOURCE_ADDRESS_REGISTER_OFFSET 0X58
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_HOST_DESTINATION_ADDRESS_REGISTER_OFFSET 0X60
|
||||
#define ACCELERATION_SCHEDULER_DIRECT_INITIATOR_GROUP_REGISTER_OFFSET 0X68
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Acceleration Scheduler Indirect Register Offsets
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_CONTROL_REGISTER_OFFSET 0x00
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_GIE_REGISTER_OFFSET 0x04
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_IER_REGISTER_OFFSET 0x08
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_ISR_REGISTER_OFFSET 0x0C
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SCHEDULER_BUFFER_BASE_ADDRESS_FETCH_REGISTER_OFFSET 0x18
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_REG_FETCH_REGISTER_OFFSET 0x20
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_REG_FETCH_REGISTER_OFFSET 0x28
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_DATA_SIZE_REG_FETCH_REGISTER_OFFSET 0x30
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_REG__FETCH__REGISTER_OFFSET 0x38
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_FETCH_REGISTER_OFFSET 0x40
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_FETCH_REGISTER_OFFSET 0x48
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_FETCH_REGISTER_OFFSET 0x50
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SCHEDULER_BUFFER_BASE_ADDRESS_SEND_REGISTER_OFFSET 0x58
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_REG_SEND_REGISTER_OFFSET 0x60
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_REG_SEND_REGISTER_OFFSET 0x68
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_DATA_SIZE_REG_SEND_REGISTER_OFFSET 0x70
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_REG__SEND_REGISTER_OFFSET 0x78
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_SEND_REGISTER_OFFSET 0x80
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_SEND_REGISTER_OFFSET 0x88
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_SEND_REGISTER_OFFSET 0x90
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_DMA_BASE_ADDRESS_REGISTER_OFFSET 0x98
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SOBEL_BASE_ADDRESS_REGISTER_OFFSET 0xA0
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_IMAGE_COLUMNS_REGISTER_OFFSET 0xA8
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_IMAGE_ROWS_REGISTER_OFFSET 0xB0
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_ACCEL_GROUP_REGISTER_OFFSET 0xB8
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SHARED_APM_BASE_ADDRESS_REGISTER_OFFSET 0xC0
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_SHARED_METRICS_BASE_ADDRESS_REGISTER_OFFSET 0xC8
|
||||
#define ACCELERATION_SCHEDULER_INDIRECT_APM_BASE_ADDRESS_REGISTER_OFFSET 0xD0
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Acceleration Scheduler SG Register Offsets
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_AP_CTRL 0x00
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_GIE 0x04
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_IER 0x08
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_ISR 0x0c
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_AP_RETURN 0x10
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_DMA_SG_PCIE_SCHEDULER_BASE_ADDRESS_DATA 0x18
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_SOBEL_DEVICE_ADDRESS_DATA 0x20
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_GPIO_DEVICE_ADDRESS_DATA 0x28
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_APM_DEVICE_ADDRESS_DATA 0x30
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_SHARED_APM_DEVICE_ADDRESS_DATA 0x38
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_SHARED_METRICS_ADDRESS_DATA 0x40
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_IMAGE_COLS_DATA 0x48
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_IMAGE_ROWS_DATA 0x50
|
||||
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_ACCEL_GROUP_DATA 0x58
|
||||
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// AXI BARs Offsets
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define AXI_BAR_0_OFFSET 0x20000000
|
||||
#define AXI_BAR_1_OFFSET 0x30000000
|
||||
#define AXI_BAR_2_OFFSET 0x40000000
|
||||
#define AXI_BAR_3_OFFSET 0x50000000
|
||||
#define AXI_BAR_4_OFFSET 0x60000000
|
||||
#define AXI_BAR_5_OFFSET 0x70000000
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// AXI BARs Dynamic Address Translation Registers Offsets
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define AXI_BAR0_LOWER_ADDRESS_OFFSET 0x20C
|
||||
#define AXI_BAR0_UPPER_ADDRESS_OFFSET 0x208
|
||||
|
||||
#define AXI_BAR1_LOWER_ADDRESS_OFFSET 0x214
|
||||
#define AXI_BAR1_UPPER_ADDRESS_OFFSET 0x210
|
||||
|
||||
#define AXI_BAR2_LOWER_ADDRESS_OFFSET 0x21C
|
||||
#define AXI_BAR2_UPPER_ADDRESS_OFFSET 0x218
|
||||
|
||||
#define AXI_BAR3_LOWER_ADDRESS_OFFSET 0x224
|
||||
#define AXI_BAR3_UPPER_ADDRESS_OFFSET 0x220
|
||||
|
||||
#define AXI_BAR4_LOWER_ADDRESS_OFFSET 0x22C
|
||||
#define AXI_BAR4_UPPER_ADDRESS_OFFSET 0x228
|
||||
|
||||
#define AXI_BAR5_LOWER_ADDRESS_OFFSET 0x234
|
||||
#define AXI_BAR5_UPPER_ADDRESS_OFFSET 0x230
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// IOCtl Commands
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define COMMAND_REQUEST_ACCELERATOR_ACCESS 0x0100
|
||||
#define COMMAND_REQUEST_ACCELERATOR_SG_ACCESS 0x0200
|
||||
#define COMMAND_SET_PAGES 0x0300
|
||||
#define COMMAND_UNMAP_PAGES 0x0400
|
||||
#define COMMAND_RESET_VARIABLES 0x0500
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Scenarios
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define SCENARIO_SCATTER_GATHER 1
|
||||
#define SCENARIO_WORST_CASE 2
|
||||
#define SCENARIO_WORST_CASE_CDMA 3
|
||||
|
||||
struct image_info
|
||||
{
|
||||
uint32_t rows;
|
||||
uint32_t columns;
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
uint32_t apm_read_transactions; //Offset 0 Bytes
|
||||
uint32_t apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
uint32_t apm_write_transactions; //Offset 8 Bytes
|
||||
uint32_t apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
uint32_t apm_packets; //Offset 16 Bytes
|
||||
uint32_t apm_bytes; //Offset 20 Bytes
|
||||
|
||||
uint32_t apm_gcc_l; //Offset 24 Bytes
|
||||
uint32_t apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
uint32_t cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
uint32_t cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
uint32_t cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
uint32_t cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
uint32_t cdma_send_time_start_l; //Offset 48 Bytes
|
||||
uint32_t cdma_send_time_start_u; //Offset 52 Bytes
|
||||
uint32_t cdma_send_time_end_l; //Offset 56 Bytes
|
||||
uint32_t cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
uint32_t dma_accel_time_start_l; //Offset 64 Bytes
|
||||
uint32_t dma_accel_time_start_u; //Offset 68 Bytes
|
||||
uint32_t dma_accel_time_end_l; //Offset 72 Bytes
|
||||
uint32_t dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
uint64_t total_time_start;
|
||||
uint64_t total_time_end;
|
||||
|
||||
uint64_t sleep_time_start;
|
||||
uint64_t sleep_time_end;
|
||||
|
||||
uint64_t preparation_time_start;
|
||||
uint64_t preparation_time_end;
|
||||
|
||||
uint64_t load_time_start;
|
||||
uint64_t load_time_end;
|
||||
|
||||
uint64_t save_time_start;
|
||||
uint64_t save_time_end;
|
||||
|
||||
};
|
||||
|
||||
struct metrics_per_process
|
||||
{
|
||||
struct metrics agd0;
|
||||
struct metrics agd1;
|
||||
|
||||
struct metrics agi0;
|
||||
struct metrics agi1;
|
||||
struct metrics agi2;
|
||||
struct metrics agi3;
|
||||
|
||||
struct metrics agsg;
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
uint64_t total_time_start;
|
||||
uint64_t total_time_end;
|
||||
|
||||
uint64_t sleep_time_start;
|
||||
uint64_t sleep_time_end;
|
||||
|
||||
uint64_t preparation_time_start;
|
||||
uint64_t preparation_time_end;
|
||||
|
||||
uint64_t load_time_start;
|
||||
uint64_t load_time_end;
|
||||
|
||||
uint64_t save_time_start;
|
||||
uint64_t save_time_end;
|
||||
|
||||
uint64_t set_pages_overhead_time_start;
|
||||
uint64_t set_pages_overhead_time_end;
|
||||
|
||||
uint64_t unmap_pages_overhead_time_start;
|
||||
uint64_t unmap_pages_overhead_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
uint32_t accel_direct_0_occupied_pid;
|
||||
|
||||
uint32_t accel_direct_1_occupied_pid;
|
||||
|
||||
uint32_t accel_indirect_0_occupied_pid;
|
||||
|
||||
uint32_t accel_indirect_1_occupied_pid;
|
||||
|
||||
uint32_t accel_indirect_2_occupied_pid;
|
||||
|
||||
uint32_t accel_indirect_3_occupied_pid;
|
||||
|
||||
uint32_t accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
uint32_t accelerator_busy;
|
||||
uint32_t open_modules;
|
||||
|
||||
uint32_t agd0_busy;
|
||||
uint32_t agd1_busy;
|
||||
uint32_t agi0_busy;
|
||||
uint32_t agi1_busy;
|
||||
uint32_t agi2_busy;
|
||||
uint32_t agi3_busy;
|
||||
uint32_t agsg_busy;
|
||||
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics unused_shared_metrics;
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
|
||||
struct shared_repository_process
|
||||
{
|
||||
struct metrics_per_process process_metrics;
|
||||
struct image_info shared_image_info;
|
||||
int accel_completed;
|
||||
int accel_occupied;
|
||||
int image_segments;
|
||||
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint8_t magic[2];
|
||||
} bmpfile_magic_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t filesz;
|
||||
uint16_t creator1;
|
||||
uint16_t creator2;
|
||||
uint32_t bmp_offset;
|
||||
} bmpfile_header_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t header_sz;
|
||||
int32_t width;
|
||||
int32_t height;
|
||||
uint16_t nplanes;
|
||||
uint16_t bitspp;
|
||||
uint32_t compress_type;
|
||||
uint32_t bmp_bytesz;
|
||||
int32_t hres;
|
||||
int32_t vres;
|
||||
uint32_t ncolors;
|
||||
uint32_t nimpcolors;
|
||||
} bitmap_info_header_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
uint8_t b;
|
||||
uint8_t g;
|
||||
uint8_t r;
|
||||
uint8_t nothing;
|
||||
} rgb_t;
|
||||
|
||||
|
||||
typedef unsigned char pixel_t;
|
||||
|
||||
|
||||
struct pid_reserved_memories {
|
||||
|
||||
pid_t pid;
|
||||
|
||||
struct shared_repository_process *shared_repo_virtual_address;
|
||||
uint32_t shared_repo_physical_address;
|
||||
|
||||
uint64_t *pre_process_mmap_virtual_address;
|
||||
uint32_t pre_process_mmap_physical_address;
|
||||
|
||||
uint64_t *post_process_mmap_virtual_address;
|
||||
uint32_t post_process_mmap_physical_address;
|
||||
|
||||
struct sg_table *dma_sg_table_source;
|
||||
struct scatterlist *scatterlist_pointer_source;
|
||||
|
||||
int buffer_dma_buffers_source;
|
||||
|
||||
int buffer_mapped_pages_source;
|
||||
|
||||
|
||||
struct sg_table *dma_sg_table_destination;
|
||||
struct scatterlist *scatterlist_pointer_destination;
|
||||
|
||||
int buffer_dma_buffers_destination;
|
||||
|
||||
int buffer_mapped_pages_destination;
|
||||
|
||||
uint64_t *u64_sg_list_source;
|
||||
uint64_t *u64_sg_list_destination;
|
||||
|
||||
struct pid_reserved_memories *next_pid;
|
||||
|
||||
};
|
||||
|
||||
struct sg_list_addresses
|
||||
{
|
||||
pid_t current_pid;
|
||||
|
||||
uint64_t *sg_list_source_address;
|
||||
uint64_t *sg_list_destination_address;
|
||||
|
||||
};
|
||||
|
||||
struct per_thread_info
|
||||
{
|
||||
struct shared_repository_process *shared_repo_kernel_address;
|
||||
uint8_t *u8_pre_process_kernel_address;
|
||||
uint8_t *u8_post_process_kernel_address;
|
||||
|
||||
int pre_process_mmap_file;
|
||||
int post_process_mmap_file;
|
||||
int shared_repo_mmap_file;
|
||||
};
|
||||
0
Software/Microblaze_XSDK/.keep
Normal file
0
Software/Microblaze_XSDK/.keep
Normal file
221
Software/Microblaze_XSDK/src/lscript.ld
Normal file
221
Software/Microblaze_XSDK/src/lscript.ld
Normal file
@@ -0,0 +1,221 @@
|
||||
/*******************************************************************/
|
||||
/* */
|
||||
/* This file is automatically generated by linker script generator.*/
|
||||
/* */
|
||||
/* Version: */
|
||||
/* */
|
||||
/* Copyright (c) 2010 Xilinx, Inc. All rights reserved. */
|
||||
/* */
|
||||
/* Description : MicroBlaze Linker Script */
|
||||
/* */
|
||||
/*******************************************************************/
|
||||
|
||||
_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x400;
|
||||
_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x400;
|
||||
|
||||
/* Define Memories in the system */
|
||||
|
||||
MEMORY
|
||||
{
|
||||
microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr : ORIGIN = 0x50, LENGTH = 0x3FFB0
|
||||
pcie : ORIGIN = 0x10020000, LENGTH = 0x10000
|
||||
pcie_AXIBAR_0 : ORIGIN = 0x20000000, LENGTH = 0x400000
|
||||
pcie_AXIBAR_1 : ORIGIN = 0x30000000, LENGTH = 0x400000
|
||||
pcie_AXIBAR_2 : ORIGIN = 0x40000000, LENGTH = 0x400000
|
||||
pcie_AXIBAR_3 : ORIGIN = 0x50000000, LENGTH = 0x400000
|
||||
pcie_AXIBAR_4 : ORIGIN = 0x60000000, LENGTH = 0x1000
|
||||
pcie_AXIBAR_5 : ORIGIN = 0x70000000, LENGTH = 0x1000
|
||||
mig : ORIGIN = 0x80000000, LENGTH = 0x20000000
|
||||
shared_metrics_bram_controller_S_AXI_BASEADDR : ORIGIN = 0xC0000000, LENGTH = 0x40000
|
||||
}
|
||||
|
||||
/* Specify the default entry point to the program */
|
||||
|
||||
ENTRY(_start)
|
||||
|
||||
/* Define the sections, and where they are mapped in memory */
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
.vectors.reset 0x0 : {
|
||||
KEEP (*(.vectors.reset))
|
||||
}
|
||||
|
||||
.vectors.sw_exception 0x8 : {
|
||||
KEEP (*(.vectors.sw_exception))
|
||||
}
|
||||
|
||||
.vectors.interrupt 0x10 : {
|
||||
KEEP (*(.vectors.interrupt))
|
||||
}
|
||||
|
||||
.vectors.hw_exception 0x20 : {
|
||||
KEEP (*(.vectors.hw_exception))
|
||||
}
|
||||
|
||||
.text : {
|
||||
*(.text)
|
||||
*(.text.*)
|
||||
*(.gnu.linkonce.t.*)
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.init : {
|
||||
KEEP (*(.init))
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.fini : {
|
||||
KEEP (*(.fini))
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.ctors : {
|
||||
__CTOR_LIST__ = .;
|
||||
___CTORS_LIST___ = .;
|
||||
KEEP (*crtbegin.o(.ctors))
|
||||
KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors))
|
||||
KEEP (*(SORT(.ctors.*)))
|
||||
KEEP (*(.ctors))
|
||||
__CTOR_END__ = .;
|
||||
___CTORS_END___ = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.dtors : {
|
||||
__DTOR_LIST__ = .;
|
||||
___DTORS_LIST___ = .;
|
||||
KEEP (*crtbegin.o(.dtors))
|
||||
KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors))
|
||||
KEEP (*(SORT(.dtors.*)))
|
||||
KEEP (*(.dtors))
|
||||
PROVIDE(__DTOR_END__ = .);
|
||||
PROVIDE(___DTORS_END___ = .);
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.rodata : {
|
||||
__rodata_start = .;
|
||||
*(.rodata)
|
||||
*(.rodata.*)
|
||||
*(.gnu.linkonce.r.*)
|
||||
__rodata_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.sdata2 : {
|
||||
. = ALIGN(8);
|
||||
__sdata2_start = .;
|
||||
*(.sdata2)
|
||||
*(.sdata2.*)
|
||||
*(.gnu.linkonce.s2.*)
|
||||
. = ALIGN(8);
|
||||
__sdata2_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.sbss2 : {
|
||||
__sbss2_start = .;
|
||||
*(.sbss2)
|
||||
*(.sbss2.*)
|
||||
*(.gnu.linkonce.sb2.*)
|
||||
__sbss2_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.data : {
|
||||
. = ALIGN(4);
|
||||
__data_start = .;
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
*(.gnu.linkonce.d.*)
|
||||
__data_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.got : {
|
||||
*(.got)
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.got1 : {
|
||||
*(.got1)
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.got2 : {
|
||||
*(.got2)
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.eh_frame : {
|
||||
*(.eh_frame)
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.jcr : {
|
||||
*(.jcr)
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.gcc_except_table : {
|
||||
*(.gcc_except_table)
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.sdata : {
|
||||
. = ALIGN(8);
|
||||
__sdata_start = .;
|
||||
*(.sdata)
|
||||
*(.sdata.*)
|
||||
*(.gnu.linkonce.s.*)
|
||||
__sdata_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.sbss (NOLOAD) : {
|
||||
. = ALIGN(4);
|
||||
__sbss_start = .;
|
||||
*(.sbss)
|
||||
*(.sbss.*)
|
||||
*(.gnu.linkonce.sb.*)
|
||||
. = ALIGN(8);
|
||||
__sbss_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.tdata : {
|
||||
__tdata_start = .;
|
||||
*(.tdata)
|
||||
*(.tdata.*)
|
||||
*(.gnu.linkonce.td.*)
|
||||
__tdata_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.tbss : {
|
||||
__tbss_start = .;
|
||||
*(.tbss)
|
||||
*(.tbss.*)
|
||||
*(.gnu.linkonce.tb.*)
|
||||
__tbss_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.bss (NOLOAD) : {
|
||||
. = ALIGN(4);
|
||||
__bss_start = .;
|
||||
*(.bss)
|
||||
*(.bss.*)
|
||||
*(.gnu.linkonce.b.*)
|
||||
*(COMMON)
|
||||
. = ALIGN(4);
|
||||
__bss_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 );
|
||||
|
||||
_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 );
|
||||
|
||||
/* Generate Stack and Heap definitions */
|
||||
|
||||
.heap (NOLOAD) : {
|
||||
. = ALIGN(8);
|
||||
_heap = .;
|
||||
_heap_start = .;
|
||||
. += _HEAP_SIZE;
|
||||
_heap_end = .;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
.stack (NOLOAD) : {
|
||||
_stack_end = .;
|
||||
. += _STACK_SIZE;
|
||||
. = ALIGN(8);
|
||||
_stack = .;
|
||||
__stack = _stack;
|
||||
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
|
||||
|
||||
_end = .;
|
||||
}
|
||||
|
||||
84
Software/Microblaze_XSDK/src/main.c
Normal file
84
Software/Microblaze_XSDK/src/main.c
Normal file
@@ -0,0 +1,84 @@
|
||||
#include "stdio.h"
|
||||
#include "string.h"
|
||||
#include "stdlib.h"
|
||||
#include "platform.h"
|
||||
#include "xil_exception.h"
|
||||
#include "xparameters.h"
|
||||
#include "xstatus.h"
|
||||
|
||||
#define KBYTE 1024
|
||||
|
||||
/*
|
||||
* Functions Declaration
|
||||
*/
|
||||
int setup_acceleration_scheduler_sg();
|
||||
int setup_dma_sg_schedulers();
|
||||
int setup_acceleration_schedulers_direct();
|
||||
int setup_acceleration_schedulers_indirect();
|
||||
int setup_fetch_scheduler();
|
||||
int setup_send_scheduler();
|
||||
int setup_scheduler_buffers();
|
||||
int setup_cdmas();
|
||||
int setup_dmas();
|
||||
int setup_apms();
|
||||
int setup_shared_apm();
|
||||
int setup_gpio();
|
||||
int setup_pcie();
|
||||
int setup_sobel_filters();
|
||||
int setup_interrupt_manager();
|
||||
int setup_interrupts();
|
||||
|
||||
//The Base Address of the FPGA's BRAM (256K).
|
||||
int *bram_base_address = (int *)XPAR_SHARED_METRICS_BRAM_CONTROLLER_S_AXI_BASEADDR;
|
||||
|
||||
int main()
|
||||
{
|
||||
int repeat;
|
||||
|
||||
//Clear the Terminal Screen.
|
||||
xil_printf("%c[2J",27);
|
||||
|
||||
//Initialize the Platform.
|
||||
init_platform();
|
||||
|
||||
//Clear the FPGA's BRAM.
|
||||
for(repeat = 0; repeat < (256 * KBYTE) / 4; repeat++)
|
||||
{
|
||||
bram_base_address[repeat] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup ALL the Peripherals of the FPGA.
|
||||
*/
|
||||
setup_acceleration_schedulers_direct();
|
||||
setup_acceleration_schedulers_indirect();
|
||||
setup_fetch_scheduler();
|
||||
setup_send_scheduler();
|
||||
setup_scheduler_buffers();
|
||||
setup_cdmas();
|
||||
setup_dmas();
|
||||
setup_apms();
|
||||
setup_shared_apm();
|
||||
setup_gpio();
|
||||
setup_pcie();
|
||||
setup_sobel_filters();
|
||||
setup_acceleration_scheduler_sg();
|
||||
setup_dma_sg_schedulers();
|
||||
setup_interrupt_manager();
|
||||
|
||||
//Setup the Interrupt Controller and the Interrupts.
|
||||
setup_interrupts();
|
||||
|
||||
print("\r\n-->System is Ready\r\n");
|
||||
|
||||
|
||||
//Start an Infinite Loop to Keep the System Alive.
|
||||
while(1)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
return XST_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
103
Software/Microblaze_XSDK/src/platform.c
Normal file
103
Software/Microblaze_XSDK/src/platform.c
Normal file
@@ -0,0 +1,103 @@
|
||||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2010 - 2014 Xilinx, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* Use of the Software is limited solely to applications:
|
||||
* (a) running on a Xilinx device, or
|
||||
* (b) that interact with a Xilinx device through a bus or interconnect.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* XILINX CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of the Xilinx shall not be used
|
||||
* in advertising or otherwise to promote the sale, use or other dealings in
|
||||
* this Software without prior written authorization from Xilinx.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "xparameters.h"
|
||||
#include "xil_cache.h"
|
||||
|
||||
#include "platform_config.h"
|
||||
|
||||
/*
|
||||
* Uncomment the following line if ps7 init source files are added in the
|
||||
* source directory for compiling example outside of SDK.
|
||||
*/
|
||||
/*#include "ps7_init.h"*/
|
||||
|
||||
#ifdef STDOUT_IS_16550
|
||||
#include "xuartns550_l.h"
|
||||
|
||||
#define UART_BAUD 9600
|
||||
#endif
|
||||
|
||||
void
|
||||
enable_caches()
|
||||
{
|
||||
#ifdef __PPC__
|
||||
Xil_ICacheEnableRegion(CACHEABLE_REGION_MASK);
|
||||
Xil_DCacheEnableRegion(CACHEABLE_REGION_MASK);
|
||||
#elif __MICROBLAZE__
|
||||
#ifdef XPAR_MICROBLAZE_USE_ICACHE
|
||||
Xil_ICacheEnable();
|
||||
#endif
|
||||
#ifdef XPAR_MICROBLAZE_USE_DCACHE
|
||||
Xil_DCacheEnable();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
disable_caches()
|
||||
{
|
||||
Xil_DCacheDisable();
|
||||
Xil_ICacheDisable();
|
||||
}
|
||||
|
||||
void
|
||||
init_uart()
|
||||
{
|
||||
#ifdef STDOUT_IS_16550
|
||||
XUartNs550_SetBaud(STDOUT_BASEADDR, XPAR_XUARTNS550_CLOCK_HZ, UART_BAUD);
|
||||
XUartNs550_SetLineControlReg(STDOUT_BASEADDR, XUN_LCR_8_DATA_BITS);
|
||||
#endif
|
||||
#ifdef STDOUT_IS_PS7_UART
|
||||
/* Bootrom/BSP configures PS7 UART to 115200 bps */
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
init_platform()
|
||||
{
|
||||
/*
|
||||
* If you want to run this example outside of SDK,
|
||||
* uncomment the following line and also #include "ps7_init.h" at the top.
|
||||
* Make sure that the ps7_init.c and ps7_init.h files are included
|
||||
* along with this example source files for compilation.
|
||||
*/
|
||||
/* ps7_init();*/
|
||||
enable_caches();
|
||||
init_uart();
|
||||
}
|
||||
|
||||
void
|
||||
cleanup_platform()
|
||||
{
|
||||
disable_caches();
|
||||
}
|
||||
41
Software/Microblaze_XSDK/src/platform.h
Normal file
41
Software/Microblaze_XSDK/src/platform.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2008 - 2014 Xilinx, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* Use of the Software is limited solely to applications:
|
||||
* (a) running on a Xilinx device, or
|
||||
* (b) that interact with a Xilinx device through a bus or interconnect.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* XILINX CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of the Xilinx shall not be used
|
||||
* in advertising or otherwise to promote the sale, use or other dealings in
|
||||
* this Software without prior written authorization from Xilinx.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __PLATFORM_H_
|
||||
#define __PLATFORM_H_
|
||||
|
||||
#include "platform_config.h"
|
||||
|
||||
void init_platform();
|
||||
void cleanup_platform();
|
||||
|
||||
#endif
|
||||
4
Software/Microblaze_XSDK/src/platform_config.h
Normal file
4
Software/Microblaze_XSDK/src/platform_config.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#ifndef __PLATFORM_CONFIG_H_
|
||||
#define __PLATFORM_CONFIG_H_
|
||||
|
||||
#endif
|
||||
2986
Software/Microblaze_XSDK/src/setup_system.c
Normal file
2986
Software/Microblaze_XSDK/src/setup_system.c
Normal file
File diff suppressed because it is too large
Load Diff
159
Software/Microblaze_XSDK/src/structures.h
Normal file
159
Software/Microblaze_XSDK/src/structures.h
Normal file
@@ -0,0 +1,159 @@
|
||||
struct image_info
|
||||
{
|
||||
u32 rows;
|
||||
u32 columns;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
struct metrics
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
u32 apm_read_transactions; //Offset 0 Bytes
|
||||
u32 apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
u32 apm_write_transactions; //Offset 8 Bytes
|
||||
u32 apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
u32 apm_packets; //Offset 16 Bytes
|
||||
u32 apm_bytes; //Offset 20 Bytes
|
||||
|
||||
u32 apm_gcc_l; //Offset 24 Bytes
|
||||
u32 apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
u32 cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
u32 cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
u32 cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
u32 cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
u32 cdma_send_time_start_l; //Offset 48 Bytes
|
||||
u32 cdma_send_time_start_u; //Offset 52 Bytes
|
||||
u32 cdma_send_time_end_l; //Offset 56 Bytes
|
||||
u32 cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
u32 dma_accel_time_start_l; //Offset 64 Bytes
|
||||
u32 dma_accel_time_start_u; //Offset 68 Bytes
|
||||
u32 dma_accel_time_end_l; //Offset 72 Bytes
|
||||
u32 dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
u64 total_time_start;
|
||||
u64 total_time_end;
|
||||
|
||||
u64 sleep_time_start;
|
||||
u64 sleep_time_end;
|
||||
|
||||
u64 preparation_time_start;
|
||||
u64 preparation_time_end;
|
||||
|
||||
u64 load_time_start;
|
||||
u64 load_time_end;
|
||||
|
||||
u64 save_time_start;
|
||||
u64 save_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct metrics_per_process
|
||||
{
|
||||
/*
|
||||
* AXI Performance Monitor Metrics
|
||||
*/
|
||||
u32 apm_read_transactions; //Offset 0 Bytes
|
||||
u32 apm_read_bytes; //Offset 4 Bytes
|
||||
|
||||
u32 apm_write_transactions; //Offset 8 Bytes
|
||||
u32 apm_write_bytes; //Offset 12 Bytes
|
||||
|
||||
u32 apm_packets; //Offset 16 Bytes
|
||||
u32 apm_bytes; //Offset 20 Bytes
|
||||
|
||||
u32 apm_gcc_l; //Offset 24 Bytes
|
||||
u32 apm_gcc_u; //Offset 28 Bytes
|
||||
|
||||
u32 cdma_fetch_time_start_l; //Offset 32 Bytes
|
||||
u32 cdma_fetch_time_start_u; //Offset 36 Bytes
|
||||
u32 cdma_fetch_time_end_l; //Offset 40 Bytes
|
||||
u32 cdma_fetch_time_end_u; //Offset 44 Bytes
|
||||
|
||||
u32 cdma_send_time_start_l; //Offset 48 Bytes
|
||||
u32 cdma_send_time_start_u; //Offset 52 Bytes
|
||||
u32 cdma_send_time_end_l; //Offset 56 Bytes
|
||||
u32 cdma_send_time_end_u; //Offset 60 Bytes
|
||||
|
||||
u32 dma_accel_time_start_l; //Offset 64 Bytes
|
||||
u32 dma_accel_time_start_u; //Offset 68 Bytes
|
||||
u32 dma_accel_time_end_l; //Offset 72 Bytes
|
||||
u32 dma_accel_time_end_u; //Offset 76 Bytes
|
||||
|
||||
struct image_info shared_image_info; // Offset 80 Bytes
|
||||
|
||||
/*
|
||||
* Kernel and Userspace Metrics
|
||||
*/
|
||||
|
||||
u64 total_time_start;
|
||||
u64 total_time_end;
|
||||
|
||||
u64 sleep_time_start;
|
||||
u64 sleep_time_end;
|
||||
|
||||
u64 preparation_time_start;
|
||||
u64 preparation_time_end;
|
||||
|
||||
u64 load_time_start;
|
||||
u64 load_time_end;
|
||||
|
||||
u64 save_time_start;
|
||||
u64 save_time_end;
|
||||
|
||||
u64 set_pages_overhead_time_start;
|
||||
u64 set_pages_overhead_time_end;
|
||||
|
||||
u64 unmap_pages_overhead_time_start;
|
||||
u64 unmap_pages_overhead_time_end;
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct status_flags
|
||||
{
|
||||
u32 accel_direct_0_occupied_pid;
|
||||
u32 accel_direct_1_occupied_pid;
|
||||
|
||||
u32 accel_indirect_0_occupied_pid;
|
||||
u32 accel_indirect_1_occupied_pid;
|
||||
u32 accel_indirect_2_occupied_pid;
|
||||
u32 accel_indirect_3_occupied_pid;
|
||||
|
||||
u32 accel_sg_0_occupied_pid;
|
||||
|
||||
|
||||
u32 accelerator_busy;
|
||||
u32 open_modules;
|
||||
};
|
||||
|
||||
struct shared_repository
|
||||
{
|
||||
struct metrics unused_shared_metrics;
|
||||
|
||||
struct metrics accel_direct_0_shared_metrics;
|
||||
struct metrics accel_direct_1_shared_metrics;
|
||||
|
||||
struct metrics accel_indirect_0_shared_metrics;
|
||||
struct metrics accel_indirect_1_shared_metrics;
|
||||
struct metrics accel_indirect_2_shared_metrics;
|
||||
struct metrics accel_indirect_3_shared_metrics;
|
||||
|
||||
struct metrics accel_sg_0_shared_metrics;
|
||||
|
||||
struct status_flags shared_status_flags;
|
||||
|
||||
};
|
||||
Reference in New Issue
Block a user