Squash commits for public release

This commit is contained in:
2025-10-12 14:37:14 -04:00
commit 2effba13fa
74 changed files with 24002 additions and 0 deletions

0
Hardware/.keep Normal file
View File

View File

View File

@@ -0,0 +1,22 @@
#--------------------------------------------------------------------------------
#Copyright 1986-2015 Xilinx, Inc. All Rights Reserved.
#--------------------------------------------------------------------------------
#Tool Version: Vivado v.2015.4 (lin64) Build 1412921 Wed Nov 18 09:44:32 MST 2015
#Date : 2025
#--------------------------------------------------------------------------------
set_property IOSTANDARD LVCMOS18 [get_ports perst]
set_property PULLUP true [get_ports perst]
set_property LOC AV35 [get_ports perst]
#PCIe Reference Clock (Differential) Ports
set_property PACKAGE_PIN K8 [get_ports REFCLK_p]
set_property PACKAGE_PIN K7 [get_ports REFCLK_n]
#DDR3 Initial Calibration Complete Led Indication Output Port
set_property PACKAGE_PIN AN39 [get_ports init_calib_complete]
set_property IOSTANDARD LVCMOS18 [get_ports init_calib_complete]
set_property CLOCK_DEDICATED_ROUTE BACKBONE [get_nets pcie_acceleration_vc707_design_i/clocking_wizard/inst/clk_in1_pcie_acceleration_vc707_design_clk_wiz_1_0]
set_property LOC IBUFDS_GTE2_X1Y5 [get_cells refclk_ibuf]

View File

View File

@@ -0,0 +1,128 @@
//--------------------------------------------------------------------------------
//Copyright 1986-2015 Xilinx, Inc. All Rights Reserved.
//--------------------------------------------------------------------------------
//Tool Version: Vivado v.2015.4 (lin64) Build 1412921 Wed Nov 18 09:44:32 MST 2015
//Date : 2025
//--------------------------------------------------------------------------------
`timescale 1 ps / 1 ps
module pcie_acceleration_vc707_design_wrapper
(REFCLK_p,
REFCLK_n,
ddr3_sdram_addr,
ddr3_sdram_ba,
ddr3_sdram_cas_n,
ddr3_sdram_ck_n,
ddr3_sdram_ck_p,
ddr3_sdram_cke,
ddr3_sdram_cs_n,
ddr3_sdram_dm,
ddr3_sdram_dq,
ddr3_sdram_dqs_n,
ddr3_sdram_dqs_p,
ddr3_sdram_odt,
ddr3_sdram_ras_n,
ddr3_sdram_reset_n,
ddr3_sdram_we_n,
init_calib_complete,
pcie_7x_mgt_rxn,
pcie_7x_mgt_rxp,
pcie_7x_mgt_txn,
pcie_7x_mgt_txp,
perst,
reset,
rs232_uart_rxd,
rs232_uart_txd,
sys_diff_clock_clk_n,
sys_diff_clock_clk_p);
input REFCLK_p;
input REFCLK_n;
output [13:0]ddr3_sdram_addr;
output [2:0]ddr3_sdram_ba;
output ddr3_sdram_cas_n;
output [0:0]ddr3_sdram_ck_n;
output [0:0]ddr3_sdram_ck_p;
output [0:0]ddr3_sdram_cke;
output [0:0]ddr3_sdram_cs_n;
output [7:0]ddr3_sdram_dm;
inout [63:0]ddr3_sdram_dq;
inout [7:0]ddr3_sdram_dqs_n;
inout [7:0]ddr3_sdram_dqs_p;
output [0:0]ddr3_sdram_odt;
output ddr3_sdram_ras_n;
output ddr3_sdram_reset_n;
output ddr3_sdram_we_n;
output init_calib_complete;
input [3:0]pcie_7x_mgt_rxn;
input [3:0]pcie_7x_mgt_rxp;
output [3:0]pcie_7x_mgt_txn;
output [3:0]pcie_7x_mgt_txp;
input perst;
input reset;
input rs232_uart_rxd;
output rs232_uart_txd;
input sys_diff_clock_clk_n;
input sys_diff_clock_clk_p;
wire REFCLK_p;
wire REFCLK_n;
wire [13:0]ddr3_sdram_addr;
wire [2:0]ddr3_sdram_ba;
wire ddr3_sdram_cas_n;
wire [0:0]ddr3_sdram_ck_n;
wire [0:0]ddr3_sdram_ck_p;
wire [0:0]ddr3_sdram_cke;
wire [0:0]ddr3_sdram_cs_n;
wire [7:0]ddr3_sdram_dm;
wire [63:0]ddr3_sdram_dq;
wire [7:0]ddr3_sdram_dqs_n;
wire [7:0]ddr3_sdram_dqs_p;
wire [0:0]ddr3_sdram_odt;
wire ddr3_sdram_ras_n;
wire ddr3_sdram_reset_n;
wire ddr3_sdram_we_n;
wire init_calib_complete;
wire [3:0]pcie_7x_mgt_rxn;
wire [3:0]pcie_7x_mgt_rxp;
wire [3:0]pcie_7x_mgt_txn;
wire [3:0]pcie_7x_mgt_txp;
wire perst;
wire reset;
wire rs232_uart_rxd;
wire rs232_uart_txd;
wire sys_diff_clock_clk_n;
wire sys_diff_clock_clk_p;
IBUFDS_GTE2 refclk_ibuf (.O(REFCLK), .ODIV2(), .I(REFCLK_p), .CEB(1'b0), .IB(REFCLK_n));
pcie_acceleration_vc707_design pcie_acceleration_vc707_design_i
(.REFCLK(REFCLK),
.ddr3_sdram_addr(ddr3_sdram_addr),
.ddr3_sdram_ba(ddr3_sdram_ba),
.ddr3_sdram_cas_n(ddr3_sdram_cas_n),
.ddr3_sdram_ck_n(ddr3_sdram_ck_n),
.ddr3_sdram_ck_p(ddr3_sdram_ck_p),
.ddr3_sdram_cke(ddr3_sdram_cke),
.ddr3_sdram_cs_n(ddr3_sdram_cs_n),
.ddr3_sdram_dm(ddr3_sdram_dm),
.ddr3_sdram_dq(ddr3_sdram_dq),
.ddr3_sdram_dqs_n(ddr3_sdram_dqs_n),
.ddr3_sdram_dqs_p(ddr3_sdram_dqs_p),
.ddr3_sdram_odt(ddr3_sdram_odt),
.ddr3_sdram_ras_n(ddr3_sdram_ras_n),
.ddr3_sdram_reset_n(ddr3_sdram_reset_n),
.ddr3_sdram_we_n(ddr3_sdram_we_n),
.init_calib_complete(init_calib_complete),
.pcie_7x_mgt_rxn(pcie_7x_mgt_rxn),
.pcie_7x_mgt_rxp(pcie_7x_mgt_rxp),
.pcie_7x_mgt_txn(pcie_7x_mgt_txn),
.pcie_7x_mgt_txp(pcie_7x_mgt_txp),
.perst(perst),
.reset(reset),
.rs232_uart_rxd(rs232_uart_rxd),
.rs232_uart_txd(rs232_uart_txd),
.sys_diff_clock_clk_n(sys_diff_clock_clk_n),
.sys_diff_clock_clk_p(sys_diff_clock_clk_p));
endmodule

View File

File diff suppressed because it is too large Load Diff

View File

View File

@@ -0,0 +1,518 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "ap_int.h"
#include "acceleration_scheduler_direct.h"
/*
* -----------------------------
* Registers of the Sobel Filter
* -----------------------------
*/
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
/*
* ------------------------------
* Registers and Masks of the DMA
* ------------------------------
*/
/*
* Tx Channel Registers Base Offset.
*/
#define XAXIDMA_TX_OFFSET 0x00000000
/*
* Rx Channel Registers Base Offset.
*/
#define XAXIDMA_RX_OFFSET 0x00000030
/*
* This Set of Registers are Applicable for both Channels of the DMA.
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
*/
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
/*
* -------------------------------------------------------------
* Registers and Masks of the AXI Performance Monitor Unit (APM)
* -------------------------------------------------------------
*/
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
/*
* acceleration_scheduler_direct()
*
* The Hardware Funtionality of the Acceleration Scheduler Direct Core.
*
* The Acceleration Scheduler Direct Core is Part of the Acceleration Group Direct and is Used to Manage the whole Acceleration Procedure.
* It Interacts with the DMA, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
* It, also, Interacts with the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
*
* The Sequential Steps of the Acceleration Procedure are as Follows:
*
* a --> Enable the Counters of the AXI Performance Monitor Unit (APM).
* b --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
* c --> Setup and Start the Sobel Filter.
* d --> Setup and Start the S2MM and MM2S DMA Transfers.
* e --> Wait for an Interrupt by the DMA on Completion of the Transfer.
* f --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
* g --> Disable the Counters of the AXI Performance Monitor Unit (APM).
* h --> Acknowledge the DMA Interrupt.
* i --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
* j --> Reset the Counters of the AXI Performance Monitor Unit (APM).
* k --> Inform the Interrupt Manager About the Completion of the Acceleration Procedure.
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA.
* 03 to 13 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*/
int acceleration_scheduler_direct(/*01*/volatile ap_uint<32> *mm2s_ext_cfg,
/*02*/volatile ap_uint<1> *dma_intr_in,
/*03*/unsigned int dma_device_address,
/*04*/unsigned int sobel_device_address,
/*05*/unsigned int interrupt_manager_register_offset,
/*06*/unsigned int apm_device_address,
/*07*/unsigned int shared_apm_device_address,
/*08*/unsigned int shared_metrics_address,
/*09*/unsigned int image_cols,
/*10*/unsigned int image_rows,
/*11*/unsigned int host_mem_src_data_address,
/*12*/unsigned int host_mem_dst_data_address,
/*13*/unsigned int initiator_group
)
{
/*
* The mm2s_ext_cfg is the AXI Master Interface of the Core.
*/
#pragma HLS INTERFACE m_axi port=mm2s_ext_cfg
/*
* The dma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA.
*/
#pragma HLS INTERFACE ap_none port=dma_intr_in
/*
* The dma_device_address is a Register to Store the Base Address of the DMA that this Core
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dma_device_address bundle=mm2s_cfg
/*
* The sobel_device_address is a Register to Store the Base Address of the Sobel Filter that this Core
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=sobel_device_address bundle=mm2s_cfg
/*
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=mm2s_cfg
/*
* The apm_device_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=apm_device_address bundle=mm2s_cfg
/*
* The shared_apm_device_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
* will Need to Access through the mm2s_ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_apm_device_address bundle=mm2s_cfg
/*
* The shared_metrics_address is a Register to Store the Base Address of the Memory that this Core
* will Need to Access through the mm2s_ext_cfg AXI Master Interface in Order to Write the Metrics Information.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_metrics_address bundle=mm2s_cfg
/*
* The host_mem_src_data_address is a Register to Store the Source Address that the DMA will Use to Read the Initial Image Data.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=host_mem_src_data_address bundle=mm2s_cfg
/*
* The host_mem_dst_data_address is a Register to Store the Destination Address that the DMA will Use to Write the Processed Image Data.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=host_mem_dst_data_address bundle=mm2s_cfg
/*
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=image_cols bundle=mm2s_cfg
/*
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=image_rows bundle=mm2s_cfg
/*
* The initiator_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=initiator_group bundle=mm2s_cfg
#pragma HLS INTERFACE s_axilite port=return bundle=mm2s_cfg
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
ap_uint<32> write_transactions; // Store the Write Transactions from the APM.
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
ap_uint<1> dma_intr_in_value; // Used to Read the Last Value of the dma_intr_in Input Port.
/*
* -----------------------
* Enable the APM Counters
* -----------------------
*/
//Read the Control Register of the APM.
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
* ---------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
/*
* --------------------------------
* Setup and Start the Sobel Filter
* --------------------------------
*/
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
data_register = image_cols;
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
data_register = image_rows;
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
//Read the Control Register of the Sobel Filter.
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
data_register = data_register & 0x80;
data_register = data_register | 0x01;
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------
* Setup and Start Device to DMA Transfer (S2MM)
* ---------------------------------------------
*/
//Get from the Internal Register (host_mem_dst_data_address) of the Core the Destination Address that the DMA will Use to Write the Processed Image Data.
data_register = host_mem_dst_data_address;
//Write the Destination Address to the Destination Register of the DMA.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Read the S2MM Control Register of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
data_register = (image_cols * image_rows) * 4;
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------
* Setup and Start DMA to Device Transfer (MM2S)
* ---------------------------------------------
*/
//Get from the Internal Register (host_mem_src_data_address) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
data_register = host_mem_src_data_address;
//Write the Source Address to the Source Register of the DMA.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Read the MM2S Control Register of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
data_register = (image_cols * image_rows) * 4;
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ------------------------
* Wait for a DMA Interrupt
* ------------------------
*/
//Make an Initial Read of the Current State of the dma_intr_in Input.
dma_intr_in_value = *dma_intr_in;
//Keep Looping for as long as the dma_intr_in Input Does not Reach a Logic 1 Value.
while(dma_intr_in_value != 1)
{
//Keep Reading the Last Value of the dma_intr_in Input.
dma_intr_in_value = *dma_intr_in;
}
//Reset the Reader Variable.
dma_intr_in_value = 0;
/*
* ---------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
* ---------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(mm2s_ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
/*
* ------------------------
* Disable the APM Counters
* ------------------------
*/
//Read the Control Register of the APM.
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------------------------------
* Read the DMA S2MM Status Register to Get the IRQs (IOC, Delay, Error)
* IOC Stands for: Interrupt On Complete
* ---------------------------------------------------------------------
*/
//Read the S2MM Status Register of the DMA which among others Includes the Status of the DMA's IRQs.
memcpy(&data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
/*
* ------------------------------------
* Acknowledge the Triggered Interrupts
* ------------------------------------
*/
//Write the new Value Back to the Status Register of the DMA which Acknowledges the Triggered Interrupts.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* --------------------------------------------------------------------------
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
* --------------------------------------------------------------------------
*/
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory.
memcpy(&read_transactions, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory.
memcpy(&read_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory.
memcpy(&write_transactions, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory.
memcpy(&write_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory.
memcpy(&stream_packets, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory.
memcpy(&stream_bytes, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory.
memcpy(&gcc_lower, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory.
memcpy(&gcc_upper, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * initiator_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
/*
* ----------------------
* Reset the APM Counters
* ----------------------
*/
//Read the Control Register of the APM.
memcpy(&initial_data_register, (const ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
//Write the new Value Back to the Control Register of the APM to Release the Reset.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ------------------------------------------------------------------------------------
* Inform the Interrupt Manager that this Core Has Completed the Acceleration Procedure
* ------------------------------------------------------------------------------------
*/
//Get from the Internal Register (initiator_group) of the Core the Current Acceleration Group Number that this Core Belongs to.
data_register = initiator_group;
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
memcpy((ap_uint<32> *)(mm2s_ext_cfg + (interrupt_manager_register_offset) / 4), &data_register, sizeof(ap_uint<32>));
return 1;
}

View File

@@ -0,0 +1,126 @@
#define APM_READ_TRANSACTIONS_OFFSET 0
#define APM_READ_BYTES_OFFSET 4
#define APM_WRITE_TRANSACTIONS_OFFSET 8
#define APM_WRITE_BYTES_OFFSET 12
#define APM_PACKETS_OFFSET 16
#define APM_BYTES_OFFSET 20
#define APM_GCC_L_OFFSET 24
#define APM_GCC_U_OFFSET 28
#define CDMA_FETCH_TIME_START_L_OFFSET 32
#define CDMA_FETCH_TIME_START_U_OFFSET 36
#define CDMA_FETCH_TIME_END_L_OFFSET 40
#define CDMA_FETCH_TIME_END_U_OFFSET 44
#define CDMA_SEND_TIME_START_L_OFFSET 48
#define CDMA_SEND_TIME_START_U_OFFSET 52
#define CDMA_SEND_TIME_END_L_OFFSET 56
#define CDMA_SEND_TIME_END_U_OFFSET 60
#define DMA_ACCEL_TIME_START_L_OFFSET 64
#define DMA_ACCEL_TIME_START_U_OFFSET 68
#define DMA_ACCEL_TIME_END_L_OFFSET 72
#define DMA_ACCEL_TIME_END_U_OFFSET 76
struct image_info
{
ap_uint<32> rows;
ap_uint<32> columns;
ap_uint<64> size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
ap_uint<32> apm_packets; //Offset 16 Bytes
ap_uint<32> apm_bytes; //Offset 20 Bytes
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
ap_uint<64> total_time_start;
ap_uint<64> total_time_end;
ap_uint<64> sleep_time_start;
ap_uint<64> sleep_time_end;
ap_uint<64> preparation_time_start;
ap_uint<64> preparation_time_end;
ap_uint<64> load_time_start;
ap_uint<64> load_time_end;
ap_uint<64> save_time_start;
ap_uint<64> save_time_end;
};
struct status_flags
{
ap_uint<32> accel_direct_0_occupied_pid;
ap_uint<32> accel_direct_1_occupied_pid;
ap_uint<32> accel_indirect_0_occupied_pid;
ap_uint<32> accel_indirect_1_occupied_pid;
ap_uint<32> accel_indirect_2_occupied_pid;
ap_uint<32> accel_indirect_3_occupied_pid;
ap_uint<32> accel_sg_0_occupied_pid;
ap_uint<32> accelerator_busy;
ap_uint<32> open_modules;
};
struct shared_repository
{
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};

View File

@@ -0,0 +1,17 @@
open_project Acceleration_Scheduler_Direct
set_top acceleration_scheduler_direct
add_files acceleration_scheduler_direct.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Acceleration Scheduler Direct" -version "3.5"
exit

View File

@@ -0,0 +1,715 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "ap_int.h"
#include "acceleration_scheduler_indirect.h"
/*
* -----------------------------
* Registers of the Sobel Filter
* -----------------------------
*/
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
/*
* ------------------------------
* Registers and Masks of the DMA
* ------------------------------
*/
/*
* Tx Channel Registers Base Offset.
*/
#define XAXIDMA_TX_OFFSET 0x00000000
/*
* Rx Channel Registers Base Offset.
*/
#define XAXIDMA_RX_OFFSET 0x00000030
/*
* This Set of Registers are Applicable for both Channels of the DMA.
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
*/
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
/*
* -------------------------------------------------------------
* Registers and Masks of the AXI Performance Monitor Unit (APM)
* -------------------------------------------------------------
*/
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
/*
* acceleration_scheduler_indirect()
*
* The Hardware Funtionality of the Acceleration Scheduler Indirect Core.
*
* The Acceleration Scheduler Indirect Core is Part of the Acceleration Group Indirect and is Used to Manage the whole Acceleration Procedure.
* It Interacts with the DMA, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
* It, also, Interacts with the CDMA Fetch and CDMA Send Peripherals and the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
*
* The Sequential Steps of the Acceleration Procedure are as Follows:
*
* a --> Set the Scheduler Buffer of the Fetch Scheduler with Info that the Fetch Scheduler will Use to Start the CDMA Fetch Transfer
* from the Host Memory to the FPGA's DDR3.
* b --> Wait for the Fetch Scheduler to Send a Start Signal (start Input) when the CDMA Fetch Has Completed the Transfer.
* c --> Enable the Counters of the AXI Performance Monitor Unit (APM).
* d --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
* e --> Setup and Start the Sobel Filter.
* f --> Setup and Start the S2MM and MM2S DMA Transfers.
* g --> Wait for an Interrupt by the DMA on Completion of the Transfer.
* h --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
* i --> Disable the Counters of the AXI Performance Monitor Unit (APM).
* j --> Acknowledge the DMA Interrupt.
* k --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
* l --> Reset the Counters of the AXI Performance Monitor Unit (APM).
* m --> Set the Scheduler Buffer of the Send Scheduler with Info that the Send Scheduler will Use to Start the CDMA Send Transfer
* from the Host Memory to the FPGA's DDR3.
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
* 02 --------> Single Bit Input Used to Receive External Start Signals from the Fetch Scheduler.
* 03 --------> Single Bit Input Used to Receive External Interrupts from the DMA.
* 04 to 27 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*/
int acceleration_scheduler_indirect(/*01*/volatile ap_uint<32> *ext_cfg,
/*02*/volatile ap_uint<1> *start,
/*03*/volatile ap_uint<1> *dma_intr_in,
/*04*/unsigned int scheduler_buffer_base_address_f,
/*05*/unsigned int src_address_reg_offset_f,
/*06*/unsigned int dst_address_reg_offset_f,
/*07*/unsigned int data_size_reg_offset_f,
/*08*/unsigned int offset_reg_offset_f,
/*09*/unsigned int src_address_f,
/*10*/unsigned int dst_address_f,
/*11*/unsigned int offset_f,
/*12*/unsigned int scheduler_buffer_base_address_s,
/*13*/unsigned int src_address_reg_offset_s,
/*14*/unsigned int dst_address_reg_offset_s,
/*15*/unsigned int data_size_reg_offset_s,
/*16*/unsigned int offset_reg_offset_s,
/*17*/unsigned int src_address_s,
/*18*/unsigned int dst_address_s,
/*19*/unsigned int offset_s,
/*20*/unsigned int dma_base_address,
/*21*/unsigned int sobel_base_address,
/*22*/unsigned int image_cols,
/*23*/unsigned int image_rows,
/*24*/unsigned int accel_group,
/*25*/unsigned int shared_apm_base_address,
/*26*/unsigned int shared_metrics_base_address,
/*27*/unsigned int apm_base_address
)
{
/*
* The ext_cfg is the AXI Master Interface of the Core.
*/
#pragma HLS INTERFACE m_axi port=ext_cfg
/*
* The start is a Single Bit Input which is Used to Receive External Start Signals from the Fetch Scheduler.
*/
#pragma HLS INTERFACE ap_none port=start
/*
* The dma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA.
*/
#pragma HLS INTERFACE ap_none port=dma_intr_in
/*
* The scheduler_buffer_base_address_f is a Register to Store the Base Address of the Scheduler Buffer of the Fetch Scheduler.
* This Base Address will be Needed by the ext_cfg AXI Master Interface to Access the Scheduler Buffer.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address_f bundle=int_cfg
/*
* The src_address_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Source Address that the CDMA Fetch will Read the Data from.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=src_address_reg_offset_f bundle=int_cfg
/*
* The dst_address_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Destination Address that the CDMA Fetch will Write the Data to.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dst_address_reg_offset_f bundle=int_cfg
/*
* The data_size_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Data Size of the CDMA Fetch Transfer.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=data_size_reg_offset_f bundle=int_cfg
/*
* The offset_reg_offset_f is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Offset from the Source and Destination Base Addresses that the CDMA Fetch will Use to Make the Transfer.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=offset_reg_offset_f bundle=int_cfg
/*
* The src_address_f is a Register to Store the Source Address that the CDMA Fetch will Use to Read the Data.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=src_address_f bundle=int_cfg
/*
* The dst_address_f is a Register to Store the Destination Address that the CDMA Fetch will Use to Write the Data.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dst_address_f bundle=int_cfg
/*
* The offset_f is a Register to Store the Offset from the Source and Destination Base Addresses where the Image Data Might be Present.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=offset_f bundle=int_cfg
/*
* The scheduler_buffer_base_address_s is a Register to Store the Base Address of the Scheduler Buffer of the Send Scheduler.
* This Base Address will be Needed by the ext_cfg AXI Master Interface to Access the Scheduler Buffer.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address_s bundle=int_cfg
/*
* The src_address_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Source Address that the CDMA Send will Read the Data from.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=src_address_reg_offset_s bundle=int_cfg
/*
* The dst_address_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Destination Address that the CDMA Send will Write the Data to.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dst_address_reg_offset_s bundle=int_cfg
/*
* The data_size_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Data Size of the CDMA Send Transfer.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=data_size_reg_offset_s bundle=int_cfg
/*
* The offset_reg_offset_s is a Register to Store the Offset in the Scheduler Buffer where we Should
* Write the Offset from the Source and Destination Base Addresses that the CDMA Send will Use to Make the Transfer.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=offset_reg_offset_s bundle=int_cfg
/*
* The src_address_s is a Register to Store the Source Address that the CDMA Send will Use to Read the Data.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=src_address_s bundle=int_cfg
/*
* The dst_address_s is a Register to Store the Destination Address that the CDMA Send will Use to Write the Data.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dst_address_s bundle=int_cfg
/*
* The offset_s is a Register to Store the Offset from the Source and Destination Base Addresses where the Image Data Might be Present.
* This Register of the Core Can be Read/Written through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=offset_s bundle=int_cfg
/*
* The dma_base_address is a Register to Store the Base Address of the DMA that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dma_base_address bundle=int_cfg
/*
* The sobel_base_address is a Register to Store the Base Address of the Sobel Filter that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=sobel_base_address bundle=int_cfg
/*
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=image_cols bundle=int_cfg
/*
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=image_rows bundle=int_cfg
/*
* The accel_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=accel_group bundle=int_cfg
/*
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
/*
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
/*
* The apm_base_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=apm_base_address bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<1> start_value; // Used to Read the Last Value of the start Input Port.
ap_uint<1> dma_intr_in_value; // Used to Read the Last Value of the dma_intr_in Input Port.
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
ap_uint<32> write_transactions; // Store the Write Transactions from the APM.
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
/*
* -----------------------------------------------------------------------------------------------------------------------------------------
* Set the Registers of the Scheduler Buffer of the Fetch Scheduler with the Source and Destination Addresses, the Offset and the Data Size.
* The Fetch Scheduler will Use the above to Start the CDMA Fetch Transfer from the Host Memory to the FPGA's DDR3.
* -----------------------------------------------------------------------------------------------------------------------------------------
*/
//Get from the Internal Register (src_address_f) the Source Address for the CDMA Fetch Transfer.
data_register = src_address_f;
//Write the Source Address for the CDMA Fetch Transfer to the Source Address Register in the Scheduler Buffer of the Fetch Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + src_address_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
//Get from the Internal Register (dst_address_f) the Destination Address for the CDMA Fetch Transfer.
data_register = dst_address_f;
//Write the Destination Address for the CDMA Fetch Transfer to the Destination Address Register in the Scheduler Buffer of the Fetch Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + dst_address_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
//Get from the Internal Register (offset_f) the Offset Value for the CDMA Fetch Transfer.
data_register = offset_f;
//Write the Offset Value for the CDMA Fetch Transfer to the Offset Register in the Scheduler Buffer of the Fetch Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + offset_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
//Calculate from the Internal Registers (image_cols, image_rows) the Data Size for the CDMA Fetch Transfer.
data_register = (image_cols * image_rows * 4);
//Write the Data Size for the CDMA Fetch Transfer to the Data Size Register in the Scheduler Buffer of the Fetch Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_f + data_size_reg_offset_f) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ----------------------------------------------
* Wait for Start Signal from the Fetch Scheduler
* ----------------------------------------------
*/
//Make an Initial Read of the Current State of the start Input.
start_value = *start;
//Keep Looping for as long as the start Input Does not Reach a Logic 1 Value.
while(start_value != 1)
{
//Keep Reading the Last Value of the start Input.
start_value = *start;
}
//Reset the Reader Variable.
start_value = 0;
/*
* -----------------------
* Enable the APM Counters
* -----------------------
*/
//Read the Control Register of the APM.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
* ---------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
/*
* --------------------------------
* Setup and Start the Sobel Filter
* --------------------------------
*/
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
data_register = image_cols;
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
data_register = image_rows;
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
//Read the Control Register of the Sobel Filter.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
data_register = data_register & 0x80;
data_register = data_register | 0x01;
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
memcpy((ap_uint<32> *)(ext_cfg + (sobel_base_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------
* Setup and Start Device to DMA Transfer (S2MM)
* ---------------------------------------------
*/
//Get from the Internal Register (src_address_s) of the Core the Destination Address that the DMA will Use to Write the Processed Image Data.
//NOTE that the Destination Address of the DMA S2MM Transfer is the Source Address of the CDMA Send Transfer.
data_register = src_address_s;
//Write the Destination Address to the Destination Register of the DMA.
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Read the S2MM Control Register of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
data_register = (image_cols * image_rows * 4);
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------
* Setup and Start DMA to Device Transfer (MM2S)
* ---------------------------------------------
*/
//Get from the Internal Register (dst_address_f) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
//NOTE that the Destination Address of the CDMA Fetch Transfer is the Source Address of the DMA MM2S Transfer.
data_register = dst_address_f;
//Write the Source Address to the Source Register of the DMA.
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Read the MM2S Control Register of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
data_register = (image_cols * image_rows * 4);
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ------------------------
* Wait for a DMA Interrupt
* ------------------------
*/
//Make an Initial Read of the Current State of the dma_intr_in Input.
dma_intr_in_value = *dma_intr_in;
//Keep Looping for as long as the dma_intr_in Input Does not Reach a Logic 1 Value.
while(dma_intr_in_value != 1)
{
//Keep Reading the Last Value of the dma_intr_in Input.
dma_intr_in_value = *dma_intr_in;
}
//Reset the Reader Variable.
dma_intr_in_value = 0;
/*
* ---------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
* ---------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
/*
* ------------------------
* Disable the APM Counters
* ------------------------
*/
//Read the Control Register of the APM.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------------------------------
* Read the DMA S2MM Status Register to Get the IRQs (IOC, Delay, Error)
* IOC Stands for: Interrupt On Complete
* ---------------------------------------------------------------------
*/
//Read the S2MM Status Register of the DMA which among others Includes the Status of the DMA's IRQs.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
/*
* ------------------------------------
* Acknowledge the Triggered Interrupts
* ------------------------------------
*/
//Write the new Value Back to the Status Register of the DMA which Acknowledges the Triggered Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (dma_base_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* --------------------------------------------------------------------------
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
* --------------------------------------------------------------------------
*/
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory
memcpy(&read_transactions, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory
memcpy(&read_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory
memcpy(&write_transactions, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory
memcpy(&write_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory
memcpy(&stream_packets, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory
memcpy(&stream_bytes, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory
memcpy(&gcc_lower, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory
memcpy(&gcc_upper, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * 2) + (sizeof(struct metrics) * accel_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
/*
* ----------------------
* Reset the APM Counters
* ----------------------
*/
//Read the Control Register of the APM.
memcpy(&initial_data_register, (const ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
//Write the new Value Back to the Control Register of the APM to Release the Reset.
memcpy((ap_uint<32> *)(ext_cfg + (apm_base_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -----------------------------------------------------------------------------------------------------------------------------------------
* Set the Registers of the Scheduler Buffer of the Send Scheduler with the Source and Destination Addresses, the Offset and the Data Size.
* The Send Scheduler will Use the above to Start the CDMA Send Transfer from the Host Memory to the FPGA's DDR3.
* -----------------------------------------------------------------------------------------------------------------------------------------
*/
//Get from the Internal Register (src_address_s) the Source Address for the CDMA Transfer.
data_register = src_address_s;
//Write the Source Address for the CDMA Send Transfer to the Source Address Register in the Scheduler Buffer of the Send Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + src_address_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
//Get from the Internal Register (dst_address_s) the Destination Address for the CDMA Send Transfer.
data_register = dst_address_s;
//Write the Destination Address for the CDMA Send Transfer to the Destination Address Register in the Scheduler Buffer of the Send Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + dst_address_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
//Get from the Internal Register (offset_s) the Offset Value for the CDMA Send Transfer.
data_register = offset_s;
//Write the Offset Value for the CDMA Send Transfer to the Offset Register in the Scheduler Buffer of the Send Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + offset_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
//Calculate from the Internal Registers (image_cols, image_rows) the Data Size for the CDMA Send Transfer.
data_register = (image_cols * image_rows * 4);
//Write the Data Size for the CDMA Send Transfer to the Data Size Register in the Scheduler Buffer of the Send Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address_s + data_size_reg_offset_s) / 4), &data_register, sizeof(ap_uint<32>));
return 1;
}

View File

@@ -0,0 +1,126 @@
#define APM_READ_TRANSACTIONS_OFFSET 0
#define APM_READ_BYTES_OFFSET 4
#define APM_WRITE_TRANSACTIONS_OFFSET 8
#define APM_WRITE_BYTES_OFFSET 12
#define APM_PACKETS_OFFSET 16
#define APM_BYTES_OFFSET 20
#define APM_GCC_L_OFFSET 24
#define APM_GCC_U_OFFSET 28
#define CDMA_FETCH_TIME_START_L_OFFSET 32
#define CDMA_FETCH_TIME_START_U_OFFSET 36
#define CDMA_FETCH_TIME_END_L_OFFSET 40
#define CDMA_FETCH_TIME_END_U_OFFSET 44
#define CDMA_SEND_TIME_START_L_OFFSET 48
#define CDMA_SEND_TIME_START_U_OFFSET 52
#define CDMA_SEND_TIME_END_L_OFFSET 56
#define CDMA_SEND_TIME_END_U_OFFSET 60
#define DMA_ACCEL_TIME_START_L_OFFSET 64
#define DMA_ACCEL_TIME_START_U_OFFSET 68
#define DMA_ACCEL_TIME_END_L_OFFSET 72
#define DMA_ACCEL_TIME_END_U_OFFSET 76
struct image_info
{
ap_uint<32> rows;
ap_uint<32> columns;
ap_uint<64> size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
ap_uint<32> apm_packets; //Offset 16 Bytes
ap_uint<32> apm_bytes; //Offset 20 Bytes
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
ap_uint<64> total_time_start;
ap_uint<64> total_time_end;
ap_uint<64> sleep_time_start;
ap_uint<64> sleep_time_end;
ap_uint<64> preparation_time_start;
ap_uint<64> preparation_time_end;
ap_uint<64> load_time_start;
ap_uint<64> load_time_end;
ap_uint<64> save_time_start;
ap_uint<64> save_time_end;
};
struct status_flags
{
ap_uint<32> accel_direct_0_occupied_pid;
ap_uint<32> accel_direct_1_occupied_pid;
ap_uint<32> accel_indirect_0_occupied_pid;
ap_uint<32> accel_indirect_1_occupied_pid;
ap_uint<32> accel_indirect_2_occupied_pid;
ap_uint<32> accel_indirect_3_occupied_pid;
ap_uint<32> accel_sg_0_occupied_pid;
ap_uint<32> accelerator_busy;
ap_uint<32> open_modules;
};
struct shared_repository
{
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};

View File

@@ -0,0 +1,17 @@
open_project Acceleration_Scheduler_Indirect
set_top acceleration_scheduler_indirect
add_files acceleration_scheduler_indirect.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Acceleration Scheduler Indirect" -version "2.0"
exit

View File

@@ -0,0 +1,476 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "ap_int.h"
#include "acceleration_scheduler_sg_xdma.h"
/*
* -----------------------------
* Registers of the Sobel Filter
* -----------------------------
*/
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL 0x00
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA 0x18
#define XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA 0x20
/*
* -------------------------------------------------------------
* Registers and Masks of the AXI Performance Monitor Unit (APM)
* -------------------------------------------------------------
*/
#define XAPM_CR_GCC_RESET_MASK 0x00020000 // Global Clock Counter (GCC) Reset Mask.
#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 // Global Clock Counter (GCC) Enable Mask.
#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 // Metrics Counter Reset Mask.
#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 // Metrics Counter Enable Mask.
#define XAPM_CTL_OFFSET 0x0300 // Control Register Offset.
#define XAPM_GCC_HIGH_OFFSET 0x0000 // Global Clock Counter 32 to 63 bits (Upper) Register Offset.
#define XAPM_GCC_LOW_OFFSET 0x0004 // Global Clock Counter 0 to 31 bits (Lower) Register Offset.
#define XAPM_MC0_OFFSET 0x0100 // Metrics Counter 0 Register Offset.
#define XAPM_MC1_OFFSET 0x0110 // Metrics Counter 1 Register Offset.
#define XAPM_MC2_OFFSET 0x0120 // Metrics Counter 2 Register Offset.
#define XAPM_MC3_OFFSET 0x0130 // Metrics Counter 3 Register Offset.
#define XAPM_MC4_OFFSET 0x0140 // Metrics Counter 4 Register Offset.
#define XAPM_MC5_OFFSET 0x0150 // Metrics Counter 5 Register Offset.
/*
* --------------------------------------
* Registers of the DMA SG PCIe Scheduler
* --------------------------------------
*/
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL 0x00 // Control Register Offset.
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE 0x04 // Global Interrupt Enable Register Offset.
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER 0x08 // Interrupt Enable Register Offset.
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_ISR 0x0C // Interrupt Interrupt Status Register Offset.
#define XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_REQUESTED_DATA_SIZE_DATA 0x20 // Data Size Register for the Scatter/Gather Transfer.
/*
* acceleration_scheduler_sg_xdma()
*
* The Hardware Funtionality of the Acceleration Scheduler Scatter/Gather Core.
*
* The Acceleration Scheduler Scatter/Gather Core is Part of the Acceleration Group Scatter/Gather and is Used to Manage the whole Acceleration Procedure.
* It Interacts with the DMA SG PCIe Scheduler, Sobel Filter and APM of the Acceleration Group Direct as well as the Shared Timer (Shared APM) to Get Time Metrics.
* It, also, Interacts with the Interrupt Manager to Signalize the Completion of the Acceleration Procedure.
*
* The Sequential Steps of the Acceleration Procedure are as Follows:
*
* a --> Enable the Counters of the AXI Performance Monitor Unit (APM).
* b --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Started.
* c --> Setup and Start the Sobel Filter.
* d --> Enable the Interrupts of the DMA SG PCIe Scheduler.
* e --> Setup and Start the DMA SG PCIe Scheduler.
* f --> Wait for an Interrupt by the DMA SG PCIe Scheduler on Completion of the Acceleration.
* g --> Read the Current Value of the Shared Timer to Get the Time that the Acceleration Ended.
* h --> Disable the Counters of the AXI Performance Monitor Unit (APM).
* i --> Clear and Re-Enable the Interrupts of the DMA SG PCIe Scheduler.
* j --> Collect the Metrics from the Counters of the AXI Performance Monitor Unit (APM).
* k --> Reset the Counters of the AXI Performance Monitor Unit (APM).
* l --> Inform the Interrupt Manager About the Completion of the Acceleration Procedure.
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA SG PCIe Scheduler.
* 03 to 11 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*/
int acceleration_scheduler_sg_xdma(/*01*/volatile ap_uint<32> *ext_cfg,
/*02*/volatile ap_uint<1> *scheduler_intr_in,
/*03*/unsigned int dma_sg_pcie_scheduler_base_address,
/*04*/unsigned int sobel_device_address,
/*05*/unsigned int interrupt_manager_register_offset,
/*06*/unsigned int apm_device_address,
/*07*/unsigned int shared_apm_device_address,
/*08*/unsigned int shared_metrics_address,
/*09*/unsigned int image_cols,
/*10*/unsigned int image_rows,
/*11*/unsigned int accel_group
)
{
/*
* The ext_cfg is the AXI Master Interface of the Core.
*/
#pragma HLS INTERFACE m_axi port=ext_cfg
/*
* The scheduler_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA SG PCIe Scheduler.
*/
#pragma HLS INTERFACE ap_none port=scheduler_intr_in
/*
* The dma_sg_pcie_scheduler_base_address is a Register to Store the Base Address of the DMA SG PCIe Scheduler that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dma_sg_pcie_scheduler_base_address bundle=mm2s_cfg
/*
* The sobel_device_address is a Register to Store the Base Address of the Sobel Filter that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=sobel_device_address bundle=mm2s_cfg
/*
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=mm2s_cfg
/*
* The apm_device_address is a Register to Store the Base Address of the AXI Performance Monitor Unit (APM) that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=apm_device_address bundle=mm2s_cfg
/*
* The shared_apm_device_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_apm_device_address bundle=mm2s_cfg
/*
* The shared_metrics_address is a Register to Store the Base Address of the Memory that this Core
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_metrics_address bundle=mm2s_cfg
/*
* The image_cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=image_cols bundle=mm2s_cfg
/*
* The image_rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=image_rows bundle=mm2s_cfg
/*
* The accel_group is a Register to Store the Acceleration Group Number (0-6) that this Core Belongs to.
* This Register is Accessed through the AXI Slave Lite Interface (mm2s_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=accel_group bundle=mm2s_cfg
#pragma HLS INTERFACE s_axilite port=return bundle=mm2s_cfg
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> initial_data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> read_transactions; // Store the Read Transactions from the APM.
ap_uint<32> read_bytes; // Store the Read Bytes from the APM.
ap_uint<32> write_transactions; // Store the Write Transactions from the APM
ap_uint<32> write_bytes; // Store the Write Bytes from the APM.
ap_uint<32> stream_packets; // Store the Stream Packets from the APM.
ap_uint<32> stream_bytes; // Store the Stream Bytes from the APM.
ap_uint<32> gcc_lower; // Store the Global Clock Counter Lower Register from the APM.
ap_uint<32> gcc_upper; // Store the Global Clock Counter Upper Register from the APM.
ap_uint<32> dma_accel_time_start_gcc_l; // Store the Acceleration Start Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_start_gcc_u; // Store the Acceleration Start Time Upper Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_end_gcc_l; // Store the Acceleration End Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> dma_accel_time_end_gcc_u; // Store the Acceleration End Time Upper Register from the Shared Timer (Shared APM).
ap_uint<1> scheduler_intr_in_value; // Used to Read the Last Value of the scheduler_intr_in_value Input Port.
/*
* -----------------------
* Enable the APM Counters
* -----------------------
*/
//Read the Control Register of the APM.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the GCC and Metrics Counters.
data_register = data_register | XAPM_CR_GCC_ENABLE_MASK | XAPM_CR_MCNTR_ENABLE_MASK;
//Write the new Value Back to the Control Register of the APM to Enable the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration Start Time
* ---------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration Start Time.
memcpy(&dma_accel_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_L_OFFSET) / 4), &dma_accel_time_start_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration Start Time.
memcpy(&dma_accel_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the Acceleration Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_START_U_OFFSET) / 4), &dma_accel_time_start_gcc_u, sizeof(ap_uint<32>));
/*
* --------------------------------
* Setup and Start the Sobel Filter
* --------------------------------
*/
//Get the Sobel Filter Columns from the Internal Register (image_cols) of the Core.
data_register = image_cols;
//Write the Sobel Filter Columns to a Specific Offset of the Sobel Filter Device.
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_COLS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
//Get the Sobel Filter Rows from the Internal Register (image_rows) of the Core.
data_register = image_rows;
//Write the Sobel Filter Rows to a Specific Offset of the Sobel Filter Device.
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_ROWS_DATA) / 4), &data_register, sizeof(ap_uint<32>));
//Read the Control Register of the Sobel Filter.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
data_register = data_register & 0x80;
data_register = data_register | 0x01;
//Write the new Value Back to the Control Register of the Sobel Filter so that the Sobel Filter Gets Started.
memcpy((ap_uint<32> *)(ext_cfg + (sobel_device_address + XSOBEL_FILTER_S_AXI4_LITE_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------------
* Enable the Interrupts for the DMA SG PCIe Scheduler
* --------------------------------------------------
*/
//Read the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with a Mask to Configure the IER that all the Available IRQs Should be Enabled.
data_register = data_register | 0xFFFFFFFF;
//Write the new Value Back to the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), &data_register, sizeof(ap_uint<32>));
data_register = 0x1;
//Write the data_register Value to the Global Interrupt Enable Register (GIE) of the DMA SG PCIe Scheduler to Enable the Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -----------------------------------------
* Setup and Start the DMA SG PCIe Scheduler
* -----------------------------------------
*/
//Calculate the Image/Transfer Size According to the Internal Registers (image_cols, image_rows) of the Core.
data_register = image_rows * image_cols * 4;
//Write the Transfer Size to the Requested Data Size Register of the DMA SG PCIe Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_REQUESTED_DATA_SIZE_DATA) / 4), &data_register, sizeof(ap_uint<32>));
//Read the Control Register of the DMA SG PCIe Scheduler.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL) / 4), sizeof(ap_uint<32>));
//Set the Appropriate Masks According to the Recently Read Value that Will be Needed to Start the Sobel Filter.
data_register = data_register & 0x80;
data_register = data_register | 0x01;
//Write the new Value Back to the Control Register of the DMA SG PCIe Scheduler so that the DMA SG PCIe Scheduler Gets Started.
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_AP_CTRL) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ------------------------------------------
* Wait for a DMA SG PCIe Scheduler Interrupt
* ------------------------------------------
*/
//Make an Initial Read of the Current State of the scheduler_intr_in_value Input.
scheduler_intr_in_value = *scheduler_intr_in;
//Keep Looping for as long as the scheduler_intr_in_value Input Does not Reach a Logic 1 Value.
while(scheduler_intr_in_value != 1)
{
//Keep Reading the Last Value of the scheduler_intr_in Input.
scheduler_intr_in_value = *scheduler_intr_in;
}
//Reset the Reader Variable.
scheduler_intr_in_value = 0;
/*
* ---------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get DMA Acceleration End Time
* ---------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the Acceleration End Time.
memcpy(&dma_accel_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_L_OFFSET) / 4), &dma_accel_time_end_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the Acceleration End Time.
memcpy(&dma_accel_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the Acceleration End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + DMA_ACCEL_TIME_END_U_OFFSET) / 4), &dma_accel_time_end_gcc_u, sizeof(ap_uint<32>));
/*
* ------------------------
* Disable the APM Counters
* ------------------------
*/
//Read the Control Register of the APM.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Accordingly to Disable the GCC and Metrics Counters.
data_register = data_register & ~(XAPM_CR_GCC_ENABLE_MASK) & ~(XAPM_CR_MCNTR_ENABLE_MASK);
//Write the new Value Back to the Control Register of the APM to Disable the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -------------------------------------------------------------
* Clear and then Re-Enable the DMA SG PCIe Scheduler Interrupts
* -------------------------------------------------------------
*/
//Set a Mask to Clear the Interrupt Status Register of the DMA SG PCIe Scheduler.
data_register = data_register | 0xFFFFFFFF;
//Clear the Interrupt Status Register of the DMA SG PCIe Scheduler According to the Previous Mask.
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_ISR) / 4), &data_register, sizeof(ap_uint<32>));
//Read the Interrupt Enable Register of the DMA SG PCIe Scheduler
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with a Mask to Configure the IER that all the Available IRQs Should be Enabled.
data_register = data_register | 0xFFFFFFFF;
//Write the new Value Back to the Interrupt Enable Register (IER) Register of the DMA SG PCIe Scheduler.
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_IER) / 4), &data_register, sizeof(ap_uint<32>));
data_register = 0x1;
//Write the data_register Value to the Global Interrupt Enable Register (GIE) of the DMA SG PCIe Scheduler to Enable the Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (dma_sg_pcie_scheduler_base_address + XDMA_SG_PCIE_SCHEDULER_CFG_ADDR_GIE) / 4), &data_register, sizeof(ap_uint<32>));
/*
* --------------------------------------------------------------------------
* Read the APM Metrics Counters and Store their Values to the Metrics Memory
* --------------------------------------------------------------------------
*/
//Get the Read Transactions from the APM and Write it to the Shared Metrics Memory
memcpy(&read_transactions, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC0_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_READ_TRANSACTIONS_OFFSET) / 4), &read_transactions, sizeof(ap_uint<32>));
//Get the Read Bytes from the APM and Write it to the Shared Metrics Memory
memcpy(&read_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC1_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_READ_BYTES_OFFSET) / 4), &read_bytes, sizeof(ap_uint<32>));
//Get the Write Transactions from the APM and Write it to the Shared Metrics Memory
memcpy(&write_transactions, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC2_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_WRITE_TRANSACTIONS_OFFSET) / 4), &write_transactions, sizeof(ap_uint<32>));
//Get the Write Bytes from the APM and Write it to the Shared Metrics Memory
memcpy(&write_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC3_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_WRITE_BYTES_OFFSET) / 4), &write_bytes, sizeof(ap_uint<32>));
//Get the Stream Packets from the APM and Write it to the Shared Metrics Memory
memcpy(&stream_packets, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC4_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_PACKETS_OFFSET) / 4), &stream_packets, sizeof(ap_uint<32>));
//Get the Stream Bytes from the APM and Write it to the Shared Metrics Memory
memcpy(&stream_bytes, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_MC5_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_BYTES_OFFSET) / 4), &stream_bytes, sizeof(ap_uint<32>));
//Get the GCC Lower Register from the APM and Write it to the Shared Metrics Memory
memcpy(&gcc_lower, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_GCC_L_OFFSET) / 4), &gcc_lower, sizeof(ap_uint<32>));
//Get the GCC Upper Register from the APM and Write it to the Shared Metrics Memory
memcpy(&gcc_upper, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_address + (sizeof(struct metrics) * accel_group) + APM_GCC_U_OFFSET) / 4), &gcc_upper, sizeof(ap_uint<32>));
/*
* ----------------------
* Reset the APM Counters
* ----------------------
*/
//Read the Control Register of the APM.
memcpy(&initial_data_register, (const ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Accordingly to Reset the GCC and Metrics Counters.
data_register = initial_data_register | XAPM_CR_GCC_RESET_MASK | XAPM_CR_MCNTR_RESET_MASK;
//Write the new Value Back to the Control Register of the APM to Reset the GCC and Metrics Counters.
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Now Reverse the Value of the Previous Masks in order to Release the Reset.
data_register = initial_data_register & ~(XAPM_CR_GCC_RESET_MASK) & ~(XAPM_CR_MCNTR_RESET_MASK);
//Write the new Value Back to the Control Register of the APM to Release the Reset.
memcpy((ap_uint<32> *)(ext_cfg + (apm_device_address + XAPM_CTL_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ------------------------------------------------------------------------------------
* Inform the Interrupt Manager that this Core Has Completed the Acceleration Procedure
* ------------------------------------------------------------------------------------
*/
//Get from the Internal Register (accel_group) of the Core the Current Acceleration Group Number that this Core Belongs to.
data_register = accel_group;
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
memcpy((ap_uint<32> *)(ext_cfg + (interrupt_manager_register_offset) / 4), &data_register, sizeof(ap_uint<32>));
return 1;
}

View File

@@ -0,0 +1,126 @@
#define APM_READ_TRANSACTIONS_OFFSET 0
#define APM_READ_BYTES_OFFSET 4
#define APM_WRITE_TRANSACTIONS_OFFSET 8
#define APM_WRITE_BYTES_OFFSET 12
#define APM_PACKETS_OFFSET 16
#define APM_BYTES_OFFSET 20
#define APM_GCC_L_OFFSET 24
#define APM_GCC_U_OFFSET 28
#define CDMA_FETCH_TIME_START_L_OFFSET 32
#define CDMA_FETCH_TIME_START_U_OFFSET 36
#define CDMA_FETCH_TIME_END_L_OFFSET 40
#define CDMA_FETCH_TIME_END_U_OFFSET 44
#define CDMA_SEND_TIME_START_L_OFFSET 48
#define CDMA_SEND_TIME_START_U_OFFSET 52
#define CDMA_SEND_TIME_END_L_OFFSET 56
#define CDMA_SEND_TIME_END_U_OFFSET 60
#define DMA_ACCEL_TIME_START_L_OFFSET 64
#define DMA_ACCEL_TIME_START_U_OFFSET 68
#define DMA_ACCEL_TIME_END_L_OFFSET 72
#define DMA_ACCEL_TIME_END_U_OFFSET 76
struct image_info
{
ap_uint<32> rows;
ap_uint<32> columns;
ap_uint<64> size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
ap_uint<32> apm_packets; //Offset 16 Bytes
ap_uint<32> apm_bytes; //Offset 20 Bytes
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
ap_uint<64> total_time_start;
ap_uint<64> total_time_end;
ap_uint<64> sleep_time_start;
ap_uint<64> sleep_time_end;
ap_uint<64> preparation_time_start;
ap_uint<64> preparation_time_end;
ap_uint<64> load_time_start;
ap_uint<64> load_time_end;
ap_uint<64> save_time_start;
ap_uint<64> save_time_end;
};
struct status_flags
{
ap_uint<32> accel_direct_0_occupied_pid;
ap_uint<32> accel_direct_1_occupied_pid;
ap_uint<32> accel_indirect_0_occupied_pid;
ap_uint<32> accel_indirect_1_occupied_pid;
ap_uint<32> accel_indirect_2_occupied_pid;
ap_uint<32> accel_indirect_3_occupied_pid;
ap_uint<32> accel_sg_0_occupied_pid;
ap_uint<32> accelerator_busy;
ap_uint<32> open_modules;
};
struct shared_repository
{
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};

View File

@@ -0,0 +1,17 @@
open_project Acceleration_Scheduler_SG_XDMA
set_top acceleration_scheduler_sg_xdma
add_files acceleration_scheduler_sg_xdma.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Acceleration Scheduler SG XDMA" -version "3.5"
exit

View File

@@ -0,0 +1,698 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "ap_int.h"
#include "dma_sg_pcie_scheduler.h"
/*
* ------------------------------
* Registers and Masks of the DMA
* ------------------------------
*/
/*
* Tx Channel Registers Base Offset.
*/
#define XAXIDMA_TX_OFFSET 0x00000000
/*
* Rx Channel Registers Base Offset.
*/
#define XAXIDMA_RX_OFFSET 0x00000030
/*
* This Set of Registers are Applicable for both Channels of the DMA.
* Add XAXIDMA_TX_OFFSET to Get to TX channel, and XAXIDMA_RX_OFFSET to Get to RX Channel.
*/
#define XAXIDMA_CR_OFFSET 0x00000000 // Control Register.
#define XAXIDMA_SR_OFFSET 0x00000004 // Status Register.
#define XAXIDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
#define XAXIDMA_DESTADDR_OFFSET 0x00000018 // Destination Address Register.
#define XAXIDMA_BUFFLEN_OFFSET 0x00000028 // Transfer Data Size Register.
#define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 // Start/Stop DMA Channel Mask.
#define XAXIDMA_CR_RESET_MASK 0x00000004 // Reset DMA Mask.
#define XAXIDMA_IRQ_IOC_MASK 0x00001000 // Completion Interrupt Mask.
#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
#define XAXIDMA_IRQ_ALL_MASK 0x00007000 // All Interrupts Mask.
/*
* serve_mm2s_transfer()
*
* Invoked from the dma_sg_pcie_scheduler() Top Function.
*
* The Usability of this Function is as Follows:
*
* a --> Get from the Scatter/Gather List of the Source Memory the Physical Address of the Current Page to Transfer.
* b --> Set the Address Translation Register of the PCIe Bridge's Source AXI BAR with the Physical Address of the Current Page to Transfer.
* c --> Setup and Start the DMA.
*
* The Function Parameters are:
*
* 01 --> The AXI Master Interface of the Core (cfg).
* 02 --> The Base Address of the DMA.
* 03 --> The Data Size of the MM2S Transfer.
* 04 --> The Address of the Scatter/Gather List of the Source Memory.
* 05 --> The Address of the BCIe Bridge's Source AXI BAR.
* 06 --> The Offset in the PCIe Bridge of the Address Translation Register that Refers to the Source AXI BAR.
* 07 --> The Current Value of the Page Counter in order to Know which Physical Address to Extract from the Source Scatter/Gather List.
* 08 --> The Transfer Size for the Current Page which Might be Less than the Page Size.
*/
int serve_mm2s_transfer(/*01*/volatile ap_uint<32> *cfg,
/*02*/unsigned int dma_device_address,
/*03*/unsigned int src_data_size,
/*04*/unsigned int sgl_address,
/*05*/unsigned int axi_bar_src_address,
/*06*/unsigned int axi_bar_src_cfg_address,
/*07*/int page_counter,
/*08*/ap_uint<32>current_transfer_size
)
{
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
/*
* ---------------------------------------------------------------------------------------------------------------------
* Get the Physical Address of the Current Page of the Scatter/Gather List and Set the Source AXI BAR of the PCIe Bridge
* ---------------------------------------------------------------------------------------------------------------------
*/
//Get the 64 Bit Physical Address of the Current Page from the Source Scatter/Gather List.
//The data_register_array[0] Holds the 32 LSBs of the Physical Address.
//The data_register_array[1] Holds the 32 MSBs of the Physical Address.
memcpy(data_register_array, (const ap_uint<32> *)(cfg + ((sgl_address + (page_counter * sizeof(ap_uint<64>))) / 4)), sizeof(ap_uint<64>));
data_register = data_register_array[0];
//Write the 32 LSBs of the Physical Address of the Current Page to the Lower Register of the Source AXI BAR.
memcpy((ap_uint<32> *)(cfg + (axi_bar_src_cfg_address) / 4), &data_register, sizeof(ap_uint<32>));
data_register = data_register_array[1];
//Write the 32 MSBs of the Physical Address of the Current Page to the Upper Register of the Source AXI BAR.
memcpy((ap_uint<32> *)(cfg + (axi_bar_src_cfg_address - 4) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------
* Setup and Start DMA to Device Transfer (MM2S)
* ---------------------------------------------
*/
//Get from the Internal Register (axi_bar_src_address) of the Core the Source Address that the DMA will Use to Read the Initial Image Data.
//The Source Address of the DMA MM2S Channel will be the Source AXI BAR which Corresponds to the Physical Address of the Current Page.
data_register = axi_bar_src_address;
//Write the Source Address to the Source Register of the DMA.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SRCADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Read the MM2S Control Register of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Mask Required to Enable the MM2S DMA Channel.
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
//Write the new Value Back to the Control Register of the DMA in Order to Enable the MM2S Channel.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Write the Transfer Size to the MM2S Length Register of the DMA which Starts the MM2S Transfer.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &current_transfer_size, sizeof(ap_uint<32>));
return 1;
}
/*
* serve_s2mm_transfer()
*
* Invoked from the dma_sg_pcie_scheduler() Top Function.
*
* The Usability of this Function is as Follows:
*
* a --> Get from the Scatter/Gather List of the Destination Memory the Physical Address of the Current Page to Transfer.
* b --> Set the Address Translation Register of the PCIe Bridge's Destination AXI BAR with the Physical Address of the Current Page to Transfer.
* c --> Setup and Start the DMA.
*
* The Function Parameters are:
*
* 01 --> The AXI Master Interface of the Core (cfg).
* 02 --> The Base Address of the DMA.
* 03 --> The Data Size of the S2MM Transfer.
* 04 --> The Address of the Scatter/Gather List of the Destination Memory.
* 05 --> The Address of the BCIe Bridge's Destination AXI BAR.
* 06 --> The Offset in the PCIe Bridge of the Address Translation Register that Refers to the Source AXI BAR.
* 07 --> The Current Value of the Page Counter in order to Know which Physical Address to Extract from the Source Scatter/Gather List.
* 08 --> The Transfer Size for the Current Page which Might be Less than the Page Size.
*/
int serve_s2mm_transfer(/*01*/volatile ap_uint<32> *cfg,
/*02*/unsigned int dma_device_address,
/*03*/unsigned int src_data_size,
/*04*/unsigned int sgl_address,
/*05*/unsigned int axi_bar_dst_address,
/*06*/unsigned int axi_bar_dst_cfg_address,
/*07*/int page_counter,
/*08*/ap_uint<32>current_transfer_size)
{
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
/*
* ---------------------------------------------------------------------------------------------------------------------
* Get the Physical Address of the Current Page of the Scatter/Gather List and Set the Destination AXI BAR of the PCIe Bridge
* ---------------------------------------------------------------------------------------------------------------------
*/
//Get the 64 Bit Physical Address of the Current Page from the Destination Scatter/Gather List.
//The data_register_array[0] Holds the 32 LSBs of the Physical Address.
//The data_register_array[1] Holds the 32 MSBs of the Physical Address.
memcpy(data_register_array, (const ap_uint<32> *)(cfg + ((sgl_address + (page_counter * sizeof(ap_uint<64>))) / 4)), sizeof(ap_uint<64>));
data_register = data_register_array[0];
//Write the 32 LSBs of the Physical Address of the Current Page to the Lower Register of the Destination AXI BAR.
memcpy((ap_uint<32> *)(cfg + (axi_bar_dst_cfg_address) / 4), &data_register, sizeof(ap_uint<32>));
data_register = data_register_array[1];
//Write the 32 MSBs of the Physical Address of the Current Page to the Upper Register of the Destination AXI BAR.
memcpy((ap_uint<32> *)(cfg + (axi_bar_dst_cfg_address - 4) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ---------------------------------------------
* Setup and Start Device to DMA Transfer (S2MM)
* ---------------------------------------------
*/
//Get from the Internal Register (axi_bar_dst_address) of the Core the Destination Address that the DMA will Use to Read the Initial Image Data.
//The Destination Address of the DMA S2MM Channel will be the Destination AXI BAR which Corresponds to the Physical Address of the Current Page.
data_register = axi_bar_dst_address;
//Write the Destination Address to the Source Register of the DMA.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_DESTADDR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Read the S2MM Control Register of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Mask Required to Enable the S2MM DMA Channel.
data_register = data_register | XAXIDMA_CR_RUNSTOP_MASK;
//Write the new Value Back to the Control Register of the DMA in Order to Enable the S2MM Channel.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Write the Transfer Size to the S2MM Length Register of the DMA which Starts the S2MM Transfer.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET) / 4), &current_transfer_size, sizeof(ap_uint<32>));
return 1;
}
/*
* serve_mm2s_interrupt()
*
* Invoked from the dma_sg_pcie_scheduler() Top Function.
*
* The Usability of this Function is to Acknowledge Triggered Interrupts on the MM2S Channel of the DMA.
*
* The Function Parameters are:
*
* 01 --> The AXI Master Interface of the Core (cfg).
* 02 --> The Base Address of the DMA.
*/
int serve_mm2s_interrupt(volatile ap_uint<32> *cfg, unsigned int dma_device_address)
{
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
//Read the DMA MM2S Status Register of the DMA to Get the IRQs.
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
//Write the new Value Back to the MM2S Status Register of the DMA which Acknowledges the Triggered Interrupts on the MM2S Channel.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
return 1;
}
/*
* serve_s2mm_interrupt()
*
* Invoked from the dma_sg_pcie_scheduler() Top Function.
*
* The Usability of this Function is to Acknowledge Triggered Interrupts on the S2MM Channel of the DMA.
*
* The Function Parameters are:
*
* 01 --> The AXI Master Interface of the Core (cfg).
* 02 --> The Base Address of the DMA.
*/
int serve_s2mm_interrupt(volatile ap_uint<32> *cfg, unsigned int dma_device_address)
{
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
//Read the DMA S2MM Status Register of the DMA to Get the IRQs.
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
//Filter the Recently Read Value with the XAXIDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
data_register = data_register & XAXIDMA_IRQ_ALL_MASK;
//Write the new Value Back to the S2MM Status Register of the DMA which Acknowledges the Triggered Interrupts on the S2MM Channel.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
return 1;
}
/*
* dma_sg_pcie_scheduler() Top Function
*
* The Hardware Funtionality of the DMA SG PCIe Scheduler Core.
*
* The DMA SG PCIe Scheduler Core is Part of the Acceleration Group Scatter/Gather.
* It is Used to Manage the the MM2S and S2MM Channels of the DMA when a Scatter/Gather List is Required to Transfer the Image Data.
* The DMA SG PCIe Scheduler Interacts with the DMA of the Acceleration Group Scatter/Gather and the Configuration AXI Interface of the PCIe Bridge.
*
* The Sequential Steps of the Core's Functionality are as Follows:
*
* a --> Calculate the Number of Pages to Transfer for the MM2S and S2MM Channels of the DMA.
* b --> Enable the DMA MM2S Interrupts.
* c --> Enable the DMA S2MM Interrupts.
* d --> Start a Page Transfer over the MM2S Channel (See the serve_mm2s_transfer() Function for Details).
* e --> Start a Page Transfer over the S2MM Channel (See the serve_s2mm_transfer() Function for Details).
* f --> Loop for as long as Both Channels Require to Complete the Transfer of all the Pages (both_done).
* g --> In Every Loop Check if Either the MM2S or the S2MM Channels Have Triggered an Interrupt on Completion of the Page Transfer.
* h --> If any of the Channels Triggers an Interrupt then Clear the Channel's Interrupt
* (See the serve_mm2s_interrupt() and serve_s2MM_interrupt Functions for Details)
* and Start the Channel's next Page Transfer.
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
* 02 --------> Single Bit Input Used to Receive External Interrupts from the DMA MM2S Channel.
* 03 --------> Single Bit Input Used to Receive External Interrupts from the DMA S2MM Channel.
* 04 to 12 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*/
int dma_sg_pcie_scheduler(/*01*/volatile ap_uint<32> *cfg,
/*02*/volatile ap_uint<1> *mm2s_intr_in,
/*03*/volatile ap_uint<1> *s2mm_intr_in,
/*04*/unsigned int dma_device_address,
/*05*/unsigned int requested_data_size,
/*06*/unsigned int page_size,
/*07*/unsigned int mm2s_sgl_address,
/*08*/unsigned int axi_bar_src_address,
/*09*/unsigned int axi_bar_src_cfg_address,
/*10*/unsigned int s2mm_sgl_address,
/*11*/unsigned int axi_bar_dst_address,
/*12*/unsigned int axi_bar_dst_cfg_address
)
{
/*
* The cfg is the AXI Master Interface of the Core.
*/
#pragma HLS INTERFACE m_axi port=cfg
/*
* The mm2s_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA MM2S Channel.
*/
#pragma HLS INTERFACE ap_none port=mm2s_intr_in
/*
* The s2mm_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the DMA S2MM Channel.
*/
#pragma HLS INTERFACE ap_none port=s2mm_intr_in
/*
* The dma_device_address is a Register to Store the Base Address of the DMA that this Core
* will Need to Access through the cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dma_device_address bundle=cfg
/*
* The requested_data_size is a Register to Store the Size of the Data that will be Transferred.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=requested_data_size bundle=cfg
/*
* The page_size is a Register to Store the Size of each Page(Usually 4K in Linux) that will be Transferred.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=page_size bundle=cfg
/*
* The mm2s_sgl_address is a Register to Store the Address of the Scatter/Gather List of the Source Data.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=mm2s_sgl_address bundle=cfg
/*
* The axi_bar_src_address is a Register to Store the Address of the AXI BAR that the DMA will Use to Read the Source Data.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=axi_bar_src_address bundle=cfg
/*
* The axi_bar_src_cfg_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=axi_bar_src_cfg_address bundle=cfg
/*
* The s2mm_sgl_address is a Register to Store the Address of the Scatter/Gather List of the Destination Data.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=s2mm_sgl_address bundle=cfg
/*
* The axi_bar_dst_address is a Register to Store the Address of the AXI BAR that the DMA will Use to Write the Destination Data.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=axi_bar_dst_address bundle=cfg
/*
* The axi_bar_src_cfg_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=axi_bar_dst_cfg_address bundle=cfg
#pragma HLS INTERFACE s_axilite port=return bundle=cfg
ap_uint<32> data_register_array[2]; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> mm2s_data_size; // The Data Size to Transfer for a Page of the MM2S Channel (The Last Page may not be Full).
ap_uint<32> s2mm_data_size; // The Data Size to Transfer for a Page of the S2MM Channel (The Last Page may not be Full).
ap_uint<1> dma_mm2s_intr_in_value; // Used to Read the Last Value of the dma_mm2s_intr_in_value Input Port.
ap_uint<1> dma_s2mm_intr_in_value; // Used to Read the Last Value of the dma_s2mm_intr_in_value Input Port.
int mm2s_pages_counter = 0; // Used to Count the Number of Tranferred Pages for the MM2S Channel.
int s2mm_pages_counter = 0; // Used to Count the Number of Tranferred Pages for the S2MM Channel.
int mm2s_pages_number; // The Number of Pages to Transfer for the MM2S Channel.
int s2mm_pages_number; // The Number of Pages to Transfer for the S2MM Channel.
int mm2s_remaining_bytes; // Used to Count the Remaining Bytes of the MM2S Transfer.
int s2mm_remaining_bytes; // Used to Count the Remaining Bytes of the S2MM Transfer.
ap_uint<32> current_transfer_size;
int both_done = 0; // Flag to Know When Both Channels (MM2S/S2MM) are Done.
//Divide the Size of the Data to Transfer by the Page Size to Get the Number of Pages to Transfer over the MM2S and S2mm Channels.
mm2s_pages_number = requested_data_size / page_size;
s2mm_pages_number = requested_data_size / page_size;
/*
* If the Value of the MM2S Pages Number Multiplied by the Page Size is Less than the Initial Data Size
* then there is One More Page with Less Data than the Page Size.
*
* So, Increment mm2s_pages_number Variable by 1.
*/
if((mm2s_pages_number * page_size) < requested_data_size)
{
mm2s_pages_number = mm2s_pages_number + 1;
}
//Initialize the Remaining Bytes for the MM2S Channel to be Equal to the Data Transfer Size.
mm2s_remaining_bytes = requested_data_size;
/*
* If the Value of the S2MM Pages Number Multiplied by the Page Size is Less than the Initial Data Size
* then there is One More Page with Less Data than the Page Size.
*
* So, Increment s2mm_pages_number Variable by 1.
*/
if((s2mm_pages_number * page_size) < requested_data_size)
{
s2mm_pages_number = s2mm_pages_number + 1;
}
//Initialize the Remaining Bytes for the S2MM Channel to be Equal to the Data Transfer Size.
s2mm_remaining_bytes = requested_data_size;
/*
* ----------------------------------------------
* Enable the DMA MM2S Interrupts (DMA to Device)
* ----------------------------------------------
*/
//Read the Control Register of the MM2S Channel of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
//NOTE that IOC Stands for Interrupt On Complete.
data_register = data_register | (XAXIDMA_IRQ_ERROR_MASK | XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
//Write the new Value Back to the Control Register of the DMA to Enable the MM2S Interrupts.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ----------------------------------------------
* Enable the DMA S2MM Interrupts (Device to DMA)
* ----------------------------------------------
*/
//Read the Control Register of the S2MM Channel of the DMA.
memcpy(&data_register, (const ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
//NOTE that IOC Stands for Interrupt On Complete.
data_register = data_register | (XAXIDMA_IRQ_ERROR_MASK | XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
//Write the new Value Back to the Control Register of the DMA to Enable the S2MM Interrupts.
memcpy((ap_uint<32> *)(cfg + (dma_device_address + XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the MM2S Channel.
*/
if(mm2s_remaining_bytes >= page_size)
{
mm2s_data_size = page_size;
}
/*
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the MM2S Channel.
*/
if((mm2s_remaining_bytes > 0) && (mm2s_remaining_bytes < page_size))
{
mm2s_data_size = mm2s_remaining_bytes;
}
current_transfer_size = mm2s_data_size;
//Start a DMA Page Transfer over the MM2S Channel
//The Transfer Can be of Size Equal to a whole Page Size or Just the Remaining Bytes According to the current_transfer_size Variable.
serve_mm2s_transfer(cfg,
dma_device_address,
requested_data_size,
mm2s_sgl_address,
axi_bar_src_address,
axi_bar_src_cfg_address,
0,
current_transfer_size);
//Decrement the MM2S Channel's Remaining Bytes According to the mm2s_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
mm2s_remaining_bytes = mm2s_remaining_bytes - mm2s_data_size;
/*
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the S2MM Channel.
*/
if(s2mm_remaining_bytes >= page_size)
{
s2mm_data_size = page_size;
}
/*
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the S2MM Channel.
*/
if((s2mm_remaining_bytes > 0) && (s2mm_remaining_bytes < page_size))
{
s2mm_data_size = s2mm_remaining_bytes;
}
current_transfer_size = s2mm_data_size;
//Start a DMA Page Transfer over the S2MM Channel
//The Transfer Can be of Size Equal to a whole Page Size or Just the Remaining Bytes According to the current_transfer_size Variable.
serve_s2mm_transfer(cfg,
dma_device_address,
requested_data_size,
s2mm_sgl_address,
axi_bar_dst_address,
axi_bar_dst_cfg_address,
0,
current_transfer_size);
//Decrement the S2MM Channel's Remaining Bytes According to the s2mm_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
s2mm_remaining_bytes = s2mm_remaining_bytes - s2mm_data_size;
//Start Looping for as Long as the Rest of the Pages for the MM2S and S2MM Channels are Being Transferred.
while(both_done < 2)
{
//Read the Current State of the mm2s_intr_in Input.
dma_mm2s_intr_in_value = *mm2s_intr_in;
//Read the Current State of the s2mm_intr_in Input.
dma_s2mm_intr_in_value = *s2mm_intr_in;
/*
* If we Have an Interrupt from the MM2S Channel than we Should Clear the Interrupt and Start the Next Page Transfer.
*/
if(dma_mm2s_intr_in_value == 1)
{
//Acknowledge the Triggered Interrupt of the DMA MM2S Channel.
serve_mm2s_interrupt(cfg, dma_device_address);
//If the MM2S Pages Counter of the Current Page Has Not Reached the Total Number of Pages then Proceed to Start the Next Page Transfer.
if(mm2s_pages_counter < (mm2s_pages_number - 1))
{
/*
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the MM2S Channel.
*/
if(mm2s_remaining_bytes >= page_size)
{
mm2s_data_size = page_size;
}
/*
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the MM2S Channel.
*/
if((mm2s_remaining_bytes > 0) && (mm2s_remaining_bytes < page_size))
{
mm2s_data_size = mm2s_remaining_bytes;
}
current_transfer_size = mm2s_data_size;
//Start a DMA Page Transfer over the MM2S Channel According to the current_transfer_size Variable.
serve_mm2s_transfer(cfg,
dma_device_address,
requested_data_size,
mm2s_sgl_address,
axi_bar_src_address,
axi_bar_src_cfg_address,
mm2s_pages_counter + 1,
current_transfer_size);
//Decrement the MM2S Channel's Remaining Bytes According to the mm2s_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
mm2s_remaining_bytes = mm2s_remaining_bytes - mm2s_data_size;
}
//Increment the MM2S Pages Counter to Keep Track of the Remaining MM2S Pages to Transfer.
mm2s_pages_counter++;
}
/*
* If we Have an Interrupt from the S2MM Channel than we Should Clear the Interrupt and Start the Next Page Transfer.
*/
if(dma_s2mm_intr_in_value == 1)
{
//Acknowledge the Triggered Interrupt of the DMA S2MM Channel.
serve_s2mm_interrupt(cfg, dma_device_address);
//If the S2MM Pages Counter of the Current Page Has Not Reached the Total Number of Pages then Proceed to Start the Next Page Transfer.
if(s2mm_pages_counter < (s2mm_pages_number - 1))
{
/*
* If the Value of the Remaining Bytes is Larger that a Page Size then we Can Set the DMA to Transfer a whole Page over the S2MM Channel.
*/
if(s2mm_remaining_bytes >= page_size)
{
s2mm_data_size = page_size;
}
/*
* If the Value of the Remaining Bytes is Less that a Page Size then we Can Set the DMA to Transfer the Remaining Bytes over the S2MM Channel.
*/
if((s2mm_remaining_bytes > 0) && (s2mm_remaining_bytes < page_size))
{
s2mm_data_size = s2mm_remaining_bytes;
}
current_transfer_size = s2mm_data_size;
//Start a DMA Page Transfer over the S2MM Channel According to the current_transfer_size Variable.
serve_s2mm_transfer(cfg,
dma_device_address,
requested_data_size,
s2mm_sgl_address,
axi_bar_dst_address,
axi_bar_dst_cfg_address,
s2mm_pages_counter + 1,
current_transfer_size);
//Decrement the S2MM Channel's Remaining Bytes According to the s2mm_data_size Variable in order to Know Hom Many Bytes are left to be Transferred.
s2mm_remaining_bytes = s2mm_remaining_bytes - s2mm_data_size;
}
//Increment the S2MM Pages Counter to Keep Track of the Remaining S2MM Pages to Transfer.
s2mm_pages_counter++;
}
//If the MM2S Pages Counter Has Reached the Total Number of Pages then the MM2S Channel Has Finished the Data Transfer.
if(mm2s_pages_counter == (mm2s_pages_number))
{
//Increment the mm2s_pages_counter Variable so that will not Enter the Current if Condition Again.
mm2s_pages_counter++;
//Increment the both_done Variable on Behalf of the MM2S Channel.
//The both_done Variable will ONLY be Incremented Once on Behalf of the MM2S Channel because we will not Enter this if Condition Again.
//When the S2MM Channel, also, Increments the both_done Variable the Data Transfer is Completed (both_done =2).
both_done++;
}
//If the S2MM Pages Counter Has Reached the Total Number of Pages then the S2MM Channel Has Finished the Data Transfer.
if(s2mm_pages_counter == (s2mm_pages_number))
{
//Increment the s2mm_pages_counter Variable so that will not Enter the Current if Condition Again.
s2mm_pages_counter++;
//Increment the both_done Variable on Behalf of the S2MM Channel.
//The both_done Variable will ONLY be Incremented Once on Behalf of the S2MM Channel because we will not Enter this if Condition Again.
//When the MM2S Channel, also, Increments the both_done Variable the Data Transfer is Completed (both_done =2).
both_done++;
}
}
//Reset the Variables.
dma_mm2s_intr_in_value = 0;
dma_s2mm_intr_in_value = 0;
both_done = 0;
return 1;
}

View File

@@ -0,0 +1,126 @@
#define APM_READ_TRANSACTIONS_OFFSET 0
#define APM_READ_BYTES_OFFSET 4
#define APM_WRITE_TRANSACTIONS_OFFSET 8
#define APM_WRITE_BYTES_OFFSET 12
#define APM_PACKETS_OFFSET 16
#define APM_BYTES_OFFSET 20
#define APM_GCC_L_OFFSET 24
#define APM_GCC_U_OFFSET 28
#define CDMA_FETCH_TIME_START_L_OFFSET 32
#define CDMA_FETCH_TIME_START_U_OFFSET 36
#define CDMA_FETCH_TIME_END_L_OFFSET 40
#define CDMA_FETCH_TIME_END_U_OFFSET 44
#define CDMA_SEND_TIME_START_L_OFFSET 48
#define CDMA_SEND_TIME_START_U_OFFSET 52
#define CDMA_SEND_TIME_END_L_OFFSET 56
#define CDMA_SEND_TIME_END_U_OFFSET 60
#define DMA_ACCEL_TIME_START_L_OFFSET 64
#define DMA_ACCEL_TIME_START_U_OFFSET 68
#define DMA_ACCEL_TIME_END_L_OFFSET 72
#define DMA_ACCEL_TIME_END_U_OFFSET 76
struct image_info
{
ap_uint<32> rows;
ap_uint<32> columns;
ap_uint<64> size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
ap_uint<32> apm_packets; //Offset 16 Bytes
ap_uint<32> apm_bytes; //Offset 20 Bytes
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
ap_uint<64> total_time_start;
ap_uint<64> total_time_end;
ap_uint<64> sleep_time_start;
ap_uint<64> sleep_time_end;
ap_uint<64> preparation_time_start;
ap_uint<64> preparation_time_end;
ap_uint<64> load_time_start;
ap_uint<64> load_time_end;
ap_uint<64> save_time_start;
ap_uint<64> save_time_end;
};
struct status_flags
{
ap_uint<32> accel_direct_0_occupied_pid;
ap_uint<32> accel_direct_1_occupied_pid;
ap_uint<32> accel_indirect_0_occupied_pid;
ap_uint<32> accel_indirect_1_occupied_pid;
ap_uint<32> accel_indirect_2_occupied_pid;
ap_uint<32> accel_indirect_3_occupied_pid;
ap_uint<32> accel_sg_0_occupied_pid;
ap_uint<32> accelerator_busy;
ap_uint<32> open_modules;
};
struct shared_repository
{
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};

View File

@@ -0,0 +1,17 @@
open_project DMA_SG_PCIe_Scheduler
set_top dma_sg_pcie_scheduler
add_files dma_sg_pcie_scheduler.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "DMA SG PCIe Scheduler" -version "1.0"
exit

View File

@@ -0,0 +1,513 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "ap_int.h"
#include "fetch_scheduler.h"
/*
* ------------------------------
* Registers and Masks of the DMA
* ------------------------------
*/
#define XAXICDMA_CR_OFFSET 0x00000000 // Control Register.
#define XAXICDMA_SR_OFFSET 0x00000004 // Status Register.
#define XAXICDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
#define XAXICDMA_DSTADDR_OFFSET 0x00000020 // Destination Address Register.
#define XAXICDMA_BTT_OFFSET 0x00000028 // Bytes to Transfer Register.
#define XAXICDMA_CR_RESET_MASK 0x00000004 // Reset CDMA Mask.
#define XAXICDMA_XR_IRQ_IOC_MASK 0x00001000 // Interrupt On Completion (IOC) Mask.
#define XAXICDMA_XR_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
#define XAXICDMA_XR_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
#define XAXICDMA_XR_IRQ_ALL_MASK 0x00007000 // All Interrupt Mask.
/*
* --------------------------------------------------------------------------
* Registers and Masks of the AXI Performance Monitor Unit (APM/Shared Timer)
* --------------------------------------------------------------------------
*/
#define XAPM_GCC_HIGH_OFFSET 0x0000 //Global Clock Counter 32 to 63 Bits (Upper).
#define XAPM_GCC_LOW_OFFSET 0x0004 //Global Clock Counter 0 to 31 Bits (Lower).
/*
* fetch_scheduler()
*
* The Hardware Funtionality of the Fetch Scheduler Core.
*
* The Fetch Scheduler Core Does not Belong to Any Particular Acceleration Group but it is Used by ALL(4) the Acceleration Groups Indirect (AGIs).
* The Responsibility of this Core is to Manage the Procedure of Fetching Data to the DDR3 Memory that will be Processed by the AGIs.
* It Checks its Scheduler Buffer in Round Robin for new Transfer Requests by any of the AGIs.
* If it Finds Information for new Transfer it Starts the CDMA Fetch Core to Transfer Image Data from the Host's Memory to the FPGA's DDR3.
* The Corresponding AGIs will be then Signaled by the Fetch Scheduler to Process the Image Data.
*
* When an AGI wants to Request Image Data from the Fetch Scheduler it Has to Write the Source and Destination Addresses as well as the Transfer Size
* and, if Required, an Address Offset to the Scheduler Buffer that Belongs to the Fetch Scheduler.
*
* The Scheduler Buffer Has 4 Sets of Registers with 4 Registers for each Set.
* The 4 Registers are Used to Store the Source Address, the Destination Address, the Transfer Size and an Address Offset (If Required) Respectively.
* Each Set Corresponds to One of the 4 AGIs.
*
* When an AGI Writes the Above Information to the Scheduler Buffer, the Fetch Scheduler Starts a CDMA Transfer Accordingly
* to Fetch the Image Data in the FPGA's DDR3.
*
* The Sequential Steps of the Acceleration Procedure are as Follows:
*
* Start a for Loop of 4 Iterations where in each Iteration we Check for new CDMA Transfer Requests by each of the 4 AGIs Respectively.
*
* a --> Read the Data Size Register from the Current Set of Registers of the Scheduler Buffer.
* If there is a Non-Zero Value then we Know that the Corresponding AGI Has Written the Required
* Info (Source/Destination Address, Transfer Size, Addrress Offset) in Order to Request a Transfer by the CDMA Fetch.
* If there is a Zero Value then we Check the Data Size Register of the Next Set for a Transfer Request by the Next AGI.
* b --> Enable the Interrupts on the CDMA Fetch Core.
* c --> Setup the CDMA with the Source and Destination Addresses.
* If the Source Data Should be Fetched through the PCIe Bridge then Get the Source Address from the Scheduler Buffer and Set the
* Address Translation Register of the Corresponding AXI BAR of the PCIe Bridge with this Address.
* Then Set the Source Address Register of the CDMA Fetch Core to be the Corresponding AXI BAR.
* If the Source Data Should not be Fetched through the PCIe Bridge then Just Set the Source Address Register of the CDMA Fetch Core
* with the Source Address of the Scheduler Buffer.
* d --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Fetch Transfer Started.
* e --> Setup the Bytes to Transfer Register with the Transfer Size which Triggers the CDMA Fetch Transfer.
* f --> Wait for an Interrupt by the CDMA Fetch on Completion of the Transfer.
* g --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Fetch Transfer Ended.
* h --> Acknowledge the CDMA Fetch Interrupt.
* i --> Reset the CDMA Fetch Core.
* j --> Re-Enable the Interrupts on the CDMA Fetch Core.
* k --> Clear the Set of Registers of the Scheduler Buffer that Refer to the Current AGI.
* l --> Send a Start Signal to the Acceleration Scheduler Indirect of the Corresponding AGI to Initiate the Acceleration Procedure.
*
* Repeat the Above Steps (a to l) for the Next Set of Registers of the Scheduler Buffer.
*
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
* 02 --------> Single Bit Input Used to Receive External Interrupts from the CDMA Fetch Core.
* 03 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI0.
* 04 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI1.
* 05 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI2.
* 06 --------> Single Bit Output Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI3.
* 07 to 19 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*
* NOTE datr in pcie_ctl_datr_address Stands for Dynamic Address Translator Register.
*/
int fetch_scheduler(/*01*/volatile ap_uint<32> *ext_cfg,
/*02*/volatile ap_uint<1> *cdma_intr_in,
/*03*/volatile ap_uint<1> *start_0,
/*04*/volatile ap_uint<1> *start_1,
/*05*/volatile ap_uint<1> *start_2,
/*06*/volatile ap_uint<1> *start_3,
/*07*/unsigned int cdma_base_address,
/*08*/unsigned int scheduler_buffer_base_address,
/*09*/unsigned int src_address_first_reg_offset,
/*10*/unsigned int dst_address_first_reg_offset,
/*11*/unsigned int data_size_first_reg_offset,
/*12*/unsigned int offset_first_reg_offset,
/*13*/unsigned int step_offset,
/*14*/unsigned int shared_apm_base_address,
/*15*/unsigned int shared_metrics_base_address,
/*16*/unsigned int axi_bar_base_address,
/*17*/unsigned int pcie_ctl_datr_address,
/*17*/unsigned int pcie_mode,
/*19*/unsigned int accel_group_jump
)
{
/*
* The ext_cfg is the AXI Master Interface of the Core.
*/
#pragma HLS INTERFACE m_axi port=ext_cfg
/*
* The cdma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the CDMA Fetch Core.
*/
#pragma HLS INTERFACE ap_none port=cdma_intr_in
/*
* The start_0 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI0.
*/
#pragma HLS INTERFACE ap_ovld port=start_0
/*
* The start_1 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI1.
*/
#pragma HLS INTERFACE ap_ovld port=start_1
/*
* The start_2 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI2.
*/
#pragma HLS INTERFACE ap_ovld port=start_2
/*
* The start_3 is a Single Bit Output which is Used to Send Start Signals to the Acceleration Scheduler Indirect of the AGI3.
*/
#pragma HLS INTERFACE ap_ovld port=start_3
/*
* The cdma_base_address is a Register to Store the Base Address of the CDMA Fetch that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=cdma_base_address bundle=int_cfg
/*
* The scheduler_buffer_base_address is a Register to Store the Base Address of the Scheduler Buffer that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address bundle=int_cfg
/*
* The src_address_first_reg_offset is a Register to Store the Address Offset where the Source Address Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=src_address_first_reg_offset bundle=int_cfg
/*
* The dst_address_first_reg_offset is a Register to Store the Address Offset where the Destination Address Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dst_address_first_reg_offset bundle=int_cfg
/*
* The data_size_first_reg_offset is a Register to Store the Address Offset where the Transfer Size Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=data_size_first_reg_offset bundle=int_cfg
/*
* The offset_first_reg_offset is a Register to Store the Address Offset where the Offset Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=offset_first_reg_offset bundle=int_cfg
/*
* The step_offset is a Register to Store the Number of Bytes to Jump inside the Scheduler Buffer
* in order to Locate the Next Set of Registers.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=step_offset bundle=int_cfg
/*
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
/*
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
/*
* The axi_bar_base_address is a Register to Store the Base Address of the Source AXI BAR of the PCIe Bridge that this Core
* will Need to Access through the ext_cfg AXI Master Interface in Order to Read the Image Data over the PCIe Bus.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=axi_bar_base_address bundle=int_cfg
/*
* The pcie_ctl_datr_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Source AXI BAR.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=pcie_ctl_datr_address bundle=int_cfg
/*
* The pcie_mode is a Register to Store a Value (0 or 1) that Indicates whether we Access the Source Image Data through the PCIe Bridge or not.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=pcie_mode bundle=int_cfg
/*
* The accel_group_jump is a Register to Store a Value that Helps to Access the Correct Metrics Structure in the Metrics Memory in order
* to Store the Time Metrics that Refer to the Current AGI.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=accel_group_jump bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
int repeat;
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> irq; // Used to Temporalily Store the IRQ Mask.
ap_uint<32> source_address_register; // Used to Temporalily Store the Value of the Source Address Register of the Scheduler Buffer.
ap_uint<32> destination_address_register; // Used to Temporalily Store the Value of the Destination Address Register of the Scheduler Buffer.
ap_uint<32> data_size_register; // Used to Temporalily Store the Value of the Data Size Register of the Scheduler Buffer.
ap_uint<32> offset_register; // Used to Temporalily Store the Value of the Offset Register of the Scheduler Buffer.
ap_uint<32> address; // Used to Calculate an Address along with an Offset.
ap_uint<1> cdma_intr_in_value; // Used to Read the Last Value of the cdma_intr_in_value Input Port.
ap_uint<32> cdma_fetch_time_start_gcc_l; // Store the CDMA Fetch Transfer Start Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> cdma_fetch_time_start_gcc_u; // Store the CDMA Fetch Transfer Start Time Upper Register from the Shared Timer (Shared APM).
ap_uint<32> cdma_fetch_time_end_gcc_l; // Store the CDMA Fetch Transfer End Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> cdma_fetch_time_end_gcc_u; // Store the CDMA Fetch Transfer End Time Upper Register from the Shared Timer (Shared APM).
/*
* Start an Infinite Loop.
*/
while(1)
{
/*
* Make 4 Iterations and each Time Check the Current Set of Registers of the Scheduler Buffer for a New CDMA Fetch Transfer Request
* by the AGI that Refers to the Current Set of Registers.
*/
for(repeat = 0; repeat < 4; repeat++)
{
//Read the Data Size Register of the Current Set of Registers of the Scheduler Buffer.
memcpy(&data_size_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//If the Data Size Register is not Empty then we Have a New CDMA Fetch Transfer Request.
//Else the Fetch Scheduler will Check the Data Size Register of the Next Set in the Next Iteration.
if(data_size_register != 0)
{
/*
* --------------------------------------------
* Enable the Interrupts on the CDMA Fetch Core
* --------------------------------------------
*/
//Read the Control Register of the CDMA Fetch Core.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
//NOTE that IOC Stands for Interrupt On Complete.
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
//Write the new Value Back to the Control Register of the CDMA Fetch Core to Enable the Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -------------------------------------------------------------------------
* Setup the Source and Destination Address Registers of the CDMA Fetch Core
* -------------------------------------------------------------------------
*/
//If the PCIe Mode is Enabled then the Source Data Should be Read through the PCIe Bridge.
//This Mode Requires to Set the Address Tranlation Register of the Source AXI BAR of the PCI Bridge.
if(pcie_mode == 1)
{
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Address Tranlation Register of the Source AXI BAR of the PCI Bridge with the Source Physical Address.
memcpy((ap_uint<32> *)(ext_cfg + (pcie_ctl_datr_address) / 4), &source_address_register, sizeof(ap_uint<32>));
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Source Address Register of the CDMA Fetch Core to be the Specified Source AXI BAR along with a Possible Offset.
address = axi_bar_base_address + offset_register;
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
}
//If the PCIe Mode is Disabled there is no Need to Set the Address Translation Registers of the PCIe Bridge.
else
{
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Source Address Register of the CDMA Fetch Core with the Source Address along with the Offset Read from the Scheduler Buffer.
address = source_address_register + offset_register;
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
}
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Destination Address Register of the CDMA Fetch Core with the Destination Address.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &destination_address_register, sizeof(ap_uint<32>));
/*
* ----------------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Fetch Transfer Start Time
* ----------------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Fetch Transfer Start Time.
memcpy(&cdma_fetch_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the CDMA Fetch Transfer Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_START_L_OFFSET) / 4), &cdma_fetch_time_start_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Fetch Transfer Start Time.
memcpy(&cdma_fetch_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the CDMA Fetch Transfer Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_START_U_OFFSET) / 4), &cdma_fetch_time_start_gcc_u, sizeof(ap_uint<32>));
/*
* ---------------------------------------------------------------------------------------------
* Setup the Bytes To Transfer (BTT) Register of the CDMA Fetch Core which Triggers the Transfer
* ---------------------------------------------------------------------------------------------
*/
//Set the Bytes To Tranfer Register of the CDMA Fetch Core with the Transfer Size in Bytes.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_BTT_OFFSET) / 4), &data_size_register, sizeof(ap_uint<32>));
/*
* -------------------------
* Wait for a CDMA Interrupt
* -------------------------
*/
//Make an Initial Read of the Current State of the cdma_intr_in Input.
cdma_intr_in_value = *cdma_intr_in;
//Keep Looping for as long as the cdma_intr_in Input Does not Reach a Logic 1 Value.
while(cdma_intr_in_value != 1)
{
//Keep Reading the Last Value of the cdma_intr_in Input.
cdma_intr_in_value = *cdma_intr_in;
}
//Reset the Reader Variable.
cdma_intr_in_value = 0;
/*
* -------------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Fetch Tranfer End Time
* -------------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Fetch Tranfer End Time.
memcpy(&cdma_fetch_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the CDMA Fetch Tranfer End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_END_L_OFFSET) / 4), &cdma_fetch_time_end_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Fetch Tranfer End Time.
memcpy(&cdma_fetch_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the CDMA Fetch Tranfer End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_FETCH_TIME_END_U_OFFSET) / 4), &cdma_fetch_time_end_gcc_u, sizeof(ap_uint<32>));
/*
* ------------------------------------
* Acknowledge the CDMA Fetch Interrupt
* ------------------------------------
*/
//Read the Status Register of the CDMA Fetch Core which among others Includes the Status of the DMA's IRQs.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
//Filter the Recently Read Value with the XAXICDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
irq = data_register & XAXICDMA_XR_IRQ_ALL_MASK;
//Write the new Value Back to the Status Register of the CDMA Fetch Core which Acknowledges the Triggered Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), &irq, sizeof(ap_uint<32>));
/*
* -------------------------
* Reset the CDMA Fetch Core
* -------------------------
*/
//Write the Reset Mask to the Control Register of the CDMA Fetch Core in order to Reset the Core.
data_register = XAXICDMA_CR_RESET_MASK;
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -----------------------------------------------
* Re-Enable the Interrupts on the CDMA Fetch Core
* -----------------------------------------------
*/
//Read the Control Register of the CDMA Fetch Core.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
//NOTE that IOC Stands for Interrupt On Complete.
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
//Write the new Value Back to the Control Register of the CDMA Fetch Core to Enable the Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* Reset to Zero the 4 Registers of the Current Set of Registers of the Scheduler Buffer
*/
data_register = 0;
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
/*
* Each Iteration Refers to a Specific AGI.
* Check the Current Iteration Value and Start the Acceleration Scheduler Indirect of the Correct AGI.
*/
if(repeat == 0)
{
//Trigger the start_0 Signal for one Clock Cycle.
*start_0 = 0;
*start_0 = 1;
}
if(repeat == 1)
{
//Trigger the start_1 Signal for one Clock Cycle.
*start_1 = 0;
*start_1 = 1;
}
if(repeat == 2)
{
//Trigger the start_2 Signal for one Clock Cycle.
*start_2 = 0;
*start_2 = 1;
}
if(repeat == 3)
{
//Trigger the start_3 Signal for one Clock Cycle.
*start_3 = 0;
*start_3 = 1;
}
}
}
}
return 1;
}

View File

@@ -0,0 +1,126 @@
#define APM_READ_TRANSACTIONS_OFFSET 0
#define APM_READ_BYTES_OFFSET 4
#define APM_WRITE_TRANSACTIONS_OFFSET 8
#define APM_WRITE_BYTES_OFFSET 12
#define APM_PACKETS_OFFSET 16
#define APM_BYTES_OFFSET 20
#define APM_GCC_L_OFFSET 24
#define APM_GCC_U_OFFSET 28
#define CDMA_FETCH_TIME_START_L_OFFSET 32
#define CDMA_FETCH_TIME_START_U_OFFSET 36
#define CDMA_FETCH_TIME_END_L_OFFSET 40
#define CDMA_FETCH_TIME_END_U_OFFSET 44
#define CDMA_SEND_TIME_START_L_OFFSET 48
#define CDMA_SEND_TIME_START_U_OFFSET 52
#define CDMA_SEND_TIME_END_L_OFFSET 56
#define CDMA_SEND_TIME_END_U_OFFSET 60
#define DMA_ACCEL_TIME_START_L_OFFSET 64
#define DMA_ACCEL_TIME_START_U_OFFSET 68
#define DMA_ACCEL_TIME_END_L_OFFSET 72
#define DMA_ACCEL_TIME_END_U_OFFSET 76
struct image_info
{
ap_uint<32> rows;
ap_uint<32> columns;
ap_uint<64> size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
ap_uint<32> apm_packets; //Offset 16 Bytes
ap_uint<32> apm_bytes; //Offset 20 Bytes
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
ap_uint<64> total_time_start;
ap_uint<64> total_time_end;
ap_uint<64> sleep_time_start;
ap_uint<64> sleep_time_end;
ap_uint<64> preparation_time_start;
ap_uint<64> preparation_time_end;
ap_uint<64> load_time_start;
ap_uint<64> load_time_end;
ap_uint<64> save_time_start;
ap_uint<64> save_time_end;
};
struct status_flags
{
ap_uint<32> accel_direct_0_occupied_pid;
ap_uint<32> accel_direct_1_occupied_pid;
ap_uint<32> accel_indirect_0_occupied_pid;
ap_uint<32> accel_indirect_1_occupied_pid;
ap_uint<32> accel_indirect_2_occupied_pid;
ap_uint<32> accel_indirect_3_occupied_pid;
ap_uint<32> accel_sg_0_occupied_pid;
ap_uint<32> accelerator_busy;
ap_uint<32> open_modules;
};
struct shared_repository
{
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};

View File

@@ -0,0 +1,17 @@
open_project Fetch_Scheduler
set_top fetch_scheduler
add_files fetch_scheduler.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Fetch Scheduler" -version "1.0"
exit

View File

@@ -0,0 +1,102 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "ap_int.h"
#include "info_memory_block.h"
/*
* info_memory_block()
*
* The Hardware Funtionality of the Info Memory Block Core.
*
* The Info Memory Block Core is Used to Aid the Acceleration Procedure of the Acceleration Groups Indirect (AGIs).
* It is Accessed by the Acceleration Scheduler Indirect Cores of the AGIs as well as the Fetch and Send Schedulers.
*
* It Could be Considered as a Block of 16 Registers.
* The Registers are Categorized in 4 Groups/Sets with 4 Registers in each Group/Set.
*
* Every Set of Registers Refers to one of the 4 AGIs.
*
* Set 0 Refers to AGI0.
* Set 1 Refers to AGI1.
* Set 2 Refers to AGI2.
* Set 3 Refers to AGI3.
*
* The 4 Registers of Each Set Carry the Following Information:
*
* Register 0: Source Address.
* Register 1: Destination Address.
* Register 2: Data Size (Transfer Size).
* Register 3: Address Offset.
*
* If an Acceleration Scheduler Indirect Requests a CDMA Transfer it Writes the Information Above to its own Set of Registers inside the Info Memory Block.
* The Fetch or Send Scheduler Reads the Above Information from the Info Memory Block and Starts a CDMA Transfer Accordingly.
*
* The Function Parameters are the Input Ports/Interfaces of the Core:
*
* 01 to 16 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*/
int info_memory_block(/*01*/unsigned int src_address_0,
/*02*/unsigned int dst_address_0,
/*03*/unsigned int data_size_0,
/*04*/unsigned int offset_0,
/*05*/unsigned int src_address_1,
/*06*/unsigned int dst_address_1,
/*07*/unsigned int data_size_1,
/*08*/unsigned int offset_1,
/*09*/unsigned int src_address_2,
/*10*/unsigned int dst_address_2,
/*11*/unsigned int data_size_2,
/*12*/unsigned int offset_2,
/*13*/unsigned int src_address_3,
/*14*/unsigned int dst_address_3,
/*15*/unsigned int data_size_3,
/*16*/unsigned int offset_3
)
{
/*
* Source Address, Destination Address, Data Size and Address Offset Registers of the First Group/Set
*/
#pragma HLS INTERFACE s_axilite port=src_address_0 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=dst_address_0 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=data_size_0 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=offset_0 bundle=int_cfg
/*
* Source Address, Destination Address, Data Size and Address Offset Registers of the Second Group/Set
*/
#pragma HLS INTERFACE s_axilite port=src_address_1 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=dst_address_1 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=data_size_1 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=offset_1 bundle=int_cfg
/*
* Source Address, Destination Address, Data Size and Address Offset Registers of the Third Group/Set
*/
#pragma HLS INTERFACE s_axilite port=src_address_2 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=dst_address_2 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=data_size_2 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=offset_2 bundle=int_cfg
/*
* Source Address, Destination Address, Data Size and Address Offset Registers of the Fourth Group/Set
*/
#pragma HLS INTERFACE s_axilite port=src_address_3 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=dst_address_3 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=data_size_3 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=offset_3 bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
return 1;
}

View File

@@ -0,0 +1,95 @@
struct image_info
{
ap_uint<32> rows;
ap_uint<32> columns;
ap_uint<64> size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
ap_uint<32> apm_packets; //Offset 16 Bytes
ap_uint<32> apm_bytes; //Offset 20 Bytes
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
ap_uint<64> total_time_start;
ap_uint<64> total_time_end;
ap_uint<64> sleep_time_start;
ap_uint<64> sleep_time_end;
ap_uint<64> preparation_time_start;
ap_uint<64> preparation_time_end;
ap_uint<64> load_time_start;
ap_uint<64> load_time_end;
ap_uint<64> save_time_start;
ap_uint<64> save_time_end;
};
struct status_flags
{
ap_uint<32> accel_direct_0_occupied_pid;
ap_uint<32> accel_direct_1_occupied_pid;
ap_uint<32> accel_indirect_0_occupied_pid;
ap_uint<32> accel_indirect_1_occupied_pid;
ap_uint<32> accel_indirect_2_occupied_pid;
ap_uint<32> accel_indirect_3_occupied_pid;
ap_uint<32> accel_sg_0_occupied_pid;
ap_uint<32> accelerator_busy;
ap_uint<32> open_modules;
};
struct shared_repository
{
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};

View File

@@ -0,0 +1,17 @@
open_project Info_Memory_Block
set_top info_memory_block
add_files info_memory_block.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Info Memory Block" -version "1.0"
exit

View File

@@ -0,0 +1,206 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "interrupt_manager.h"
/*
* interrupt_manager()
*
* The Hardware Funtionality of the Interrupt Manager Core.
*
* The Interrupt Manager Core is Developed to Handle and Forward the Completion Interrupts from the 7 Acceleration Groups.
*
* The Goal of each Acceleration Group is to Inform the Linux Kernel Driver for the Completion of the Acceleration Procedure.
* The Communication of the FPGA with the Host System is Achieved through a PCIe Bus, thus, the Way to Signal the Driver is to Send MSI Interrupts.
*
* Sending a MSI is a Responsibility of the FPGA's PCIe Bridge.
* The PCIe Bridge Carries a 5-Bit Input to Set the Vector Number of the MSI and a 1-Bit Input which is Used to Trigger the MSI According to the Vector Number.
* In the Current Block Design the 2 Inputs of the PCIe Bridge are Connected with the Two Channels of a GPIO Peripheral.
* This GPIO from now on will be Recognized as GPIO-MSI.
* Writing Values in the Data Registers of the 2 Channels of the GPIO-MSI Leads to Triggering a MSI Interrupt.
*
* In Older Approaches the Acceleration Scheduler of each Acceleration Group would Simply Access the GPIO-MSI to Send MSI Interrupts on Completion of an Image Process.
* This Approach was Proved to be Unreliable Since the Concurrent Access to the GPIO-MSI by Multiple Acceleration Groups
* Could Lead to Possible Loss of Interrupts that were NEVER Transmitted.
*
* The new Approach to Ensure Zero Loss of Interrupts was to Develop the Current Interrupt Manager.
* The Interrupt Manager Includes an Array of 7 Registers where each Register Refers to each of the 7 Acceleration Groups.
*
* Register_Array[0] Refers to AGD0
* Register_Array[1] Refers to AGD1
* Register_Array[2] Refers to AGI0
* Register_Array[3] Refers to AGI1
* Register_Array[4] Refers to AGI2
* Register_Array[5] Refers to AGI3
* Register_Array[6] Refers to AGSG
*
* When an Acceleration Scheduler of any of the Acceleration Groups Requires to Send an MSI Interrupt for the Completion of its Acceleration Procedure
* it Simply Writes a Vector Number Value to the Corresponding Field of the Register Array of the Interrupt Manager as a MSI Request.
* The Kernel Driver Identifies the Acceleration Group that "Sent" the MSI by the Vector Number.
*
* Vector Number:0 --> AGD0
* Vector Number:1 --> AGD1
* Vector Number:2 --> AGI0
* Vector Number:3 --> AGI1
* Vector Number:4 --> AGI2
* Vector Number:5 --> AGI3
* Vector Number:6 --> AGSG
*
* The Interrupt Manager Checks in a Round Robin Manner the Fields of the Register Array for a Non-Zero Value which Indicates a new MSI Request.
* This Makes it Obvious that the Acceleration Schedulers Write to the Register Array of the Interrupt Manager the Vector Number Incremented by 1.
* This is Done to Avoid Zero Values that are not Identified by the Interrupt Manager as MSI Requests.
*
* If the Interrupt Manager Finds a Field of the Register Array with Non-Zero Value then it Decreases this Value by 1 in order to Produce
* the Correct Vector Number and Writes this Value to the GPIO-MSI Peripheral to Trigger the MSI Interrupt.
*
* The Interrupt Manager, then, Waits until it Receives an Acknowledgment Signal from the Kernel Driver before Checking for another MSI Request.
* The Kernel Driver, actually, Writes a Logic 1 Value to another GPIO Peripheral whose 1-Bit Output Signals the Interrupt Manager.
* This GPIO Peripheral from now on will be Recognized as GPIO-ACK.
*
*
* The Sequential Steps of the Interrupt Management are as Follows:
*
* Start a for Loop with 7 Iterations where each Iteration is to Check for a MSI Request by the Corresponding Acceleration Group.
* NOTE Enabling the Auto Restart Mode of the Current Core will Lead to Starting Over the for Loop.
*
* a --> Check if the Current Field of the Register Array Has a Non-Zero Value.
* If this is the Case Proceed to Send a MSI Interrupt.
* b --> Decrease the Value of the Current Field of the Register Array to Get the Correct Vector Number.
* c --> Write the Vector Number to the GPIO-MSI Peripheral that is Connected with the PCIe Bridge to Trigger an MSI Interrupt.
* d --> Wait for an Acknowledgment Signal from the Driver through the GPIO-ACK Peripheral.
* e --> Self-Clear to Zero the Current Field of the Register Array of the Interrupt Manager.
* The Next Time we Find a Non-Zero Value in this Field we Know that an Acceleration Group Has Made a Valid MSI Request.
* f --> Clear the Data Register of the GPIO-ACk Peripheral.
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
* 02 --------> Single Bit Input Used to Receive External Acknowledgements from the Linux Kernel Driver.
* 03 to 06 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*/
int interrupt_manager(/*01*/volatile ap_uint<32> *ext_cfg,
/*02*/volatile ap_uint<1> *intr_ack,
/*03*/unsigned int gpio_msi_device_address,
/*04*/unsigned int gpio_ack_device_address,
/*05*/unsigned int self_msi_request_offset,
/*06*/unsigned int msi_request[7]
)
{
/*
* The ext_cfg is the AXI Master Interface of the Core.
*/
#pragma HLS INTERFACE m_axi port=ext_cfg
/*
* The gpio_msi_device_address is a Register to Store the Base Address of the GPIO-MSI that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=gpio_msi_device_address bundle=cfg
/*
* The gpio_ack_device_address is a Register to Store the Base Address of the GPIO-ACK that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=gpio_ack_device_address bundle=cfg
/*
* The self_msi_request_offset is a Register to Store the Address Offset where the Register Array (msi_request) is Located.
* This Address Offset Actually Leads the Interrupt Manager to Access its Own Configuration Registers through its AXI Slave Lite (cfg) Interface.
*/
#pragma HLS INTERFACE s_axilite port=self_msi_request_offset bundle=cfg
#pragma HLS INTERFACE s_axilite port=msi_request bundle=cfg
/*
* The intr_ack is a Single Bit Input which is Used to Receive External Acknowledgements from the Linux Kernel Driver.
*/
#pragma HLS INTERFACE ap_none port=intr_ack
#pragma HLS INTERFACE s_axilite port=return bundle=cfg
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<1> intr_ack_value; // Used to Read the Last Value of the intr_ack Input Port.
for(int repeat = 0; repeat < 7; repeat++)
{
//If the Current Field of the Register Array (msi_request) Has a Non-Zero Value then we Have a Valid MSI Request by the Corresponding Acceleration Group.
if(msi_request[repeat] != 0)
{
/*
* ---------------------------------------------------------
* Send a MSI Interrupt by Writing to the GPIO-MSI Registers
* ---------------------------------------------------------
*/
//Decrease the Value of the Current Field of the Register Array to Get the Correct Vector Number.
data_register = msi_request[repeat] - 1;
//Write the Vector Number to the Data Register of the Second Channel of the GPIO-MSI.
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_2_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Write a Logic 1 Value to the Data Register of the First Channel of the GPIO-MSI to Trigger the MSI Interrupt.
data_register = 0x1;
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
//Set the Data Register of the First Channel of the GPIO-MSI back to Zero.
data_register = 0x0;
memcpy((ap_uint<32> *)(ext_cfg + (gpio_msi_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* ------------------------------------
* Wait for a Interrupt Acknowledgement
* ------------------------------------
*/
//Make an Initial Read of the Current State of the intr_ack Input.
intr_ack_value = *intr_ack;
//Keep Looping for as long as the intr_ack Input Does not Reach a Logic 1 Value.
while(intr_ack_value != 1)
{
//Keep Reading the Last Value of the intr_ack Input.
intr_ack_value = *intr_ack;
}
//Reset the Reader Variable.
intr_ack_value = 0;
data_register = 0x0;
/* ---------------------------------------------------------------------------
* Self-Clear the Current Field of the Register Array of the Interrupt Manager
* ---------------------------------------------------------------------------
*/
//Write a Zero Value to the Current Field of the Register Array of the Interrupt Manager to Clear the Field.
//NOTE the Interrupt Manager Herein Uses its AXI Master Interface to Write to its own AXI Slave Lite Interface.
memcpy((ap_uint<32> *)(ext_cfg + (self_msi_request_offset + (repeat * 4)) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -----------------------------
* Clear the GPIO-ACK Peripheral
* -----------------------------
*/
//Clear the GPIO-ACK by Writing a Zero Value to its Data Register.
memcpy((ap_uint<32> *)(ext_cfg + (gpio_ack_device_address + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_1_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
}
}
return 1;
}

View File

@@ -0,0 +1,16 @@
/*
* ---------------------------------------------------
* Registers and Offsets of the Xilinx GPIO Peripheral
* ---------------------------------------------------
*/
#define XGPIO_CHANNEL_1_OFFSET 0x0 // GPIO Channel 1 Base Offset.
#define XGPIO_CHANNEL_2_OFFSET 0x8 // GPIO Channel 2 Base Offset.
/*
* GPIO Channel 1 Data Register.
*
* The Data Register of GPIO Channel 2 is XGPIO_DATA_OFFSET + XGPIO_CHANNEL_2_OFFSET.
*/
#define XGPIO_DATA_OFFSET 0x0

View File

@@ -0,0 +1,17 @@
open_project Interrupt_Manager
set_top interrupt_manager
add_files interrupt_manager.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Interrupt Manager" -version "3.5"
exit

View File

@@ -0,0 +1,17 @@
open_project Send_Scheduler
set_top send_scheduler
add_files send_scheduler.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Send Scheduler" -version "3.0"
exit

View File

@@ -0,0 +1,476 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_cint.h"
#include "ap_utils.h"
#include "ap_int.h"
#include "send_scheduler.h"
/*
* ------------------------------
* Registers and Masks of the DMA
* ------------------------------
*/
#define XAXICDMA_CR_OFFSET 0x00000000 // Control Register.
#define XAXICDMA_SR_OFFSET 0x00000004 // Status Register.
#define XAXICDMA_SRCADDR_OFFSET 0x00000018 // Source Address Register.
#define XAXICDMA_DSTADDR_OFFSET 0x00000020 // Destination Address Register.
#define XAXICDMA_BTT_OFFSET 0x00000028 // Bytes to Transfer Register.
#define XAXICDMA_CR_RESET_MASK 0x00000004 // Reset CDMA Mask.
#define XAXICDMA_XR_IRQ_IOC_MASK 0x00001000 // Interrupt On Completion (IOC) Mask.
#define XAXICDMA_XR_IRQ_DELAY_MASK 0x00002000 // Delay Interrupt Mask.
#define XAXICDMA_XR_IRQ_ERROR_MASK 0x00004000 // Error Interrupt Mask.
#define XAXICDMA_XR_IRQ_ALL_MASK 0x00007000 // All Interrupt Mask.
/*
* --------------------------------------------------------------------------
* Registers and Masks of the AXI Performance Monitor Unit (APM/Shared Timer)
* --------------------------------------------------------------------------
*/
#define XAPM_GCC_HIGH_OFFSET 0x0000 //Global Clock Counter 32 to 63 Bits (Upper).
#define XAPM_GCC_LOW_OFFSET 0x0004 //Global Clock Counter 0 to 31 Bits (Lower).
/*
* send_scheduler()
*
* The Hardware Funtionality of the Send Scheduler Core.
*
* The Send Scheduler Core Does not Belong to Any Particular Acceleration Group but it is Used by ALL(4) the Acceleration Groups Indirect (AGIs).
* The Responsibility of this Core is to Manage the Procedure of Sending the Processed Data by the AGIs back to the Host's Memory.
* It Checks its Scheduler Buffer in Round Robin for new Transfer Requests by any of the AGIs.
* If it Finds Information for new Transfer it Starts the CDMA Send Core to Transfer Processed Image Data from the FPGA's DDR3 to the Host's Memory.
* The Interrupt Manager will then be Informed about the Completion of the CDMA Send Transfer which is, also, the Completion of the Acceleration Procedure of the Corresponding AGI.
*
* When an AGI wants to Request a Transfer of Processed Image Data from the Send Scheduler it Has to Write the Source and Destination Addresses as well as the Transfer Size
* and, if Required, an Address Offset to the Scheduler Buffer that Belongs to the Send Scheduler.
*
* The Scheduler Buffer Has 4 Sets of Registers with 4 Registers for each Set.
* The 4 Registers are Used to Store the Source Address, the Destination Address, the Transfer Size and an Address Offset (If Required) Respectively.
* Each Set Corresponds to One of the 4 AGIs.
*
* When an AGI Writes the Above Information to the Scheduler Buffer, the Send Scheduler Starts a CDMA Transfer Accordingly
* to Send the Processed Image Data back to the Host's Memory.
*
* The Sequential Steps of the Acceleration Procedure are as Follows:
*
* Start a for Loop of 4 Iterations where in each Iteration we Check for new CDMA Transfer Requests by each of the 4 AGIs Respectively.
*
* a --> Read the Data Size Register from the Current Set of Registers of the Scheduler Buffer.
* If there is a Non-Zero Value then we Know that the Corresponding AGI Has Written the Required
* Info (Source/Destination Address, Transfer Size, Addrress Offset) in Order to Request a Transfer by the CDMA Send.
* If there is a Zero Value then we Check the Data Size Register of the Next Set for a Transfer Request by the Next AGI.
* b --> Enable the Interrupts on the CDMA Send Core.
* c --> Setup the CDMA with the Source and Destination Addresses.
* If the Destination Data Should be Sent through the PCIe Bridge then Get the Destination Address from the Scheduler Buffer and Set the
* Address Translation Register of the Corresponding AXI BAR of the PCIe Bridge with this Address.
* Then Set the Destination Address Register of the CDMA Send Core to be the Corresponding AXI BAR.
* If the Destination Data Should not be Sent through the PCIe Bridge then Just Set the Destination Address Register of the CDMA Send Core
* with the Destination Address of the Scheduler Buffer.
* d --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Send Transfer Started.
* e --> Setup the Bytes to Transfer Register with the Transfer Size which Triggers the CDMA Send Transfer.
* f --> Wait for an Interrupt by the CDMA Send on Completion of the Transfer.
* g --> Read the Current Value of the Shared Timer to Get the Time that the CDMA Send Transfer Ended.
* h --> Acknowledge the CDMA Send Interrupt.
* i --> Reset the CDMA Send Core.
* j --> Re-Enable the Interrupts on the CDMA Send Core.
* k --> Clear the Set of Registers of the Scheduler Buffer that Refer to the Current AGI.
* l --> Inform the Interrupt Manager About the Completion of the CDMA Send Tranfer which is, also, the Completion of the Acceleration Procedure.
*
* Repeat the Above Steps (a to l) for the Next Set of Registers of the Scheduler Buffer.
*
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Master Interface of the Core Used to Access External Devices and Memories.
* 02 --------> Single Bit Input Used to Receive External Interrupts from the CDMA Send Core.
* 03 to 16 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*
* NOTE datr in pcie_ctl_datr_address Stands for Dynamic Address Translator Register.
*/
int send_scheduler(/*01*/volatile ap_uint<32> *ext_cfg,
/*02*/volatile ap_uint<1> *cdma_intr_in,
/*03*/unsigned int cdma_base_address,
/*04*/unsigned int scheduler_buffer_base_address,
/*05*/unsigned int src_address_first_reg_offset,
/*06*/unsigned int dst_address_first_reg_offset,
/*07*/unsigned int data_size_first_reg_offset,
/*08*/unsigned int offset_first_reg_offset,
/*09*/unsigned int step_offset,
/*10*/unsigned int shared_apm_base_address,
/*11*/unsigned int shared_metrics_base_address,
/*12*/unsigned int axi_bar_base_address,
/*13*/unsigned int pcie_ctl_datr_address,
/*14*/unsigned int pcie_mode,
/*15*/unsigned int interrupt_manager_register_offset,
/*16*/unsigned int accel_group_jump
)
{
/*
* The ext_cfg is the AXI Master Interface of the Core.
*/
#pragma HLS INTERFACE m_axi port=ext_cfg
/*
* The cdma_intr_in is a Single Bit Input which is Used to Receive External Interrupts from the CDMA Send Core.
*/
#pragma HLS INTERFACE ap_none port=cdma_intr_in
/*
* The cdma_base_address is a Register to Store the Base Address of the CDMA Send that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=cdma_base_address bundle=int_cfg
/*
* The scheduler_buffer_base_address is a Register to Store the Base Address of the Scheduler Buffer that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=scheduler_buffer_base_address bundle=int_cfg
/*
* The src_address_first_reg_offset is a Register to Store the Address Offset where the Source Address Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=src_address_first_reg_offset bundle=int_cfg
/*
* The dst_address_first_reg_offset is a Register to Store the Address Offset where the Destination Address Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=dst_address_first_reg_offset bundle=int_cfg
/*
* The data_size_first_reg_offset is a Register to Store the Address Offset where the Transfer Size Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=data_size_first_reg_offset bundle=int_cfg
/*
* The offset_first_reg_offset is a Register to Store the Address Offset where the Offset Register
* of the First Set of Registers inside the Scheduler Buffer is Located.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=offset_first_reg_offset bundle=int_cfg
/*
* The step_offset is a Register to Store the Number of Bytes to Jump inside the Scheduler Buffer
* in order to Locate the Next Set of Registers.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=step_offset bundle=int_cfg
/*
* The shared_apm_base_address is a Register to Store the Base Address of the Shared Timer (APM) that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_apm_base_address bundle=int_cfg
/*
* The shared_metrics_base_address is a Register to Store the Base Address of the Memory that this Core
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Metrics Information.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=shared_metrics_base_address bundle=int_cfg
/*
* The axi_bar_base_address is a Register to Store the Base Address of the Destination AXI BAR of the PCIe Bridge that this Core
* will Need to Access through the ext_cfg AXI Master Interface in Order to Write the Processed Image Data over the PCIe Bus.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=axi_bar_base_address bundle=int_cfg
/*
* The pcie_ctl_datr_address is a Register to Store the Address/Offset of the PCIe Bridge's Address Translation Register that Refers to the Destination AXI BAR.
* This Register is Accessed through the AXI Slave Lite Interface (s_axilite_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=pcie_ctl_datr_address bundle=int_cfg
/*
* The pcie_mode is a Register to Store a Value (0 or 1) that Indicates whether we Access the Destination Image Data through the PCIe Bridge or not.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=pcie_mode bundle=int_cfg
/*
* The interrupt_manager_register_offset is a Register to Store the Offset of a Specific Register of the Interrupt Manager that this Core
* will Need to Access through the ext_cfg AXI Master Interface.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=interrupt_manager_register_offset bundle=int_cfg
/*
* The accel_group_jump is a Register to Store a Value that Helps to Access the Correct Metrics Structure in the Metrics Memory in order
* to Store the Time Metrics that Refer to the Current AGI.
* This Register is Accessed through the AXI Slave Lite Interface (int_cfg) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=accel_group_jump bundle=int_cfg
#pragma HLS INTERFACE s_axilite port=return bundle=int_cfg
int repeat;
ap_uint<1> cdma_intr_in_value; // Used to Read the Last Value of the cdma_intr_in_value Input Port.
ap_uint<32> data_register; // Used to Temporalily Store Values when Reading or Writing from/to Registers of External Devices.
ap_uint<32> irq; // Used to Temporalily Store the IRQ Mask.
ap_uint<32> source_address_register; // Used to Temporalily Store the Value of the Source Address Register of the Scheduler Buffer.
ap_uint<32> destination_address_register; // Used to Temporalily Store the Value of the Destination Address Register of the Scheduler Buffer.
ap_uint<32> data_size_register; // Used to Temporalily Store the Value of the Data Size Register of the Scheduler Buffer.
ap_uint<32> offset_register; // Used to Temporalily Store the Value of the Offset Register of the Scheduler Buffer.
ap_uint<32> address; // Used to Calculate an Address along with an Offset.
ap_uint<32> cdma_send_time_start_gcc_l; // Store the CDMA Send Transfer Start Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> cdma_send_time_start_gcc_u; // Store the CDMA Send Transfer Start Time Upper Register from the Shared Timer (Shared APM).
ap_uint<32> cdma_send_time_end_gcc_l; // Store the CDMA Send Transfer End Time Lower Register from the Shared Timer (Shared APM).
ap_uint<32> cdma_send_time_end_gcc_u; // Store the CDMA Send Transfer End Time Upper Register from the Shared Timer (Shared APM).
/*
* Start an Infinite Loop.
*/
while(1)
{
/*
* Make 4 Iterations and each Time Check the Current Set of Registers of the Scheduler Buffer for a New CDMA Send Transfer Request
* by the AGI that Refers to the Current Set of Registers.
*/
for(repeat = 0; repeat < 4; repeat++)
{
//Read the Data Size Register of the Current Set of Registers of the Scheduler Buffer.
memcpy(&data_size_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//If the Data Size Register is not Empty then we Have a New CDMA Send Transfer Request.
//Else the Send Scheduler will Check the Data Size Register of the Next Set in the Next Iteration.
if(data_size_register != 0)
{
/*
* --------------------------------------------
* Enable the Interrupts on the CDMA Send Core
* --------------------------------------------
*/
//Read the Control Register of the CDMA Send Core.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
//NOTE that IOC Stands for Interrupt On Complete.
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
//Write the new Value Back to the Control Register of the CDMA Send Core to Enable the Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -------------------------------------------------------------------------
* Setup the Source and Destination Address Registers of the CDMA Send Core
* -------------------------------------------------------------------------
*/
//Read the Source Physical Address from the Source Address Register from the Current Set of the Scheduler Buffer.
memcpy(&source_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Source Address Register of the CDMA Send Core with the Source Address.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SRCADDR_OFFSET) / 4), &source_address_register, sizeof(ap_uint<32>));
//If the PCIe Mode is Enabled then the Destination Data Should be Written through the PCIe Bridge.
//This Mode Requires to Set the Address Tranlation Register of the Destination AXI BAR of the PCI Bridge.
if(pcie_mode == 1)
{
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Address Tranlation Register of the Destination AXI BAR of the PCI Bridge with the Destination Physical Address.
memcpy((ap_uint<32> *)(ext_cfg + (pcie_ctl_datr_address) / 4), &destination_address_register, sizeof(ap_uint<32>));
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Destination Address Register of the CDMA Send Core to be the Specified Destination AXI BAR along with a Possible Offset.
address = axi_bar_base_address + offset_register;
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
}
//If the PCIe Mode is Disabled there is no Need to Set the Address Translation Registers of the PCIe Bridge.
else
{
//Read the Destination Physical Address from the Destination Address Register from the Current Set of the Scheduler Buffer.
memcpy(&destination_address_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Read the Address Offset from the Offset Register from the Current Set of the Scheduler Buffer.
//NOTE it is Possible that this Register Has a Zero Value if there is no Offset Required to Access the Data.
memcpy(&offset_register, (const ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), sizeof(ap_uint<32>));
//Set the Destination Address Register of the CDMA Send Core with the Destination Address along with the Offset Read from the Scheduler Buffer.
address = destination_address_register + offset_register;
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_DSTADDR_OFFSET) / 4), &address, sizeof(ap_uint<32>));
}
/*
* ---------------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Send Transfer Start Time
* ---------------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Send Transfer Start Time.
memcpy(&cdma_send_time_start_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the CDMA Send Transfer Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_START_L_OFFSET) / 4), &cdma_send_time_start_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Send Transfer Start Time.
memcpy(&cdma_send_time_start_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the CDMA Send Transfer Start Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_START_U_OFFSET) / 4), &cdma_send_time_start_gcc_u, sizeof(ap_uint<32>));
/*
* --------------------------------------------------------------------------------------------
* Setup the Bytes To Transfer (BTT) Register of the CDMA Send Core which Triggers the Transfer
* --------------------------------------------------------------------------------------------
*/
//Set the Bytes To Tranfer Register of the CDMA Send Core with the Transfer Size in Bytes.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_BTT_OFFSET) / 4), &data_size_register, sizeof(ap_uint<32>));
/*
* -------------------------
* Wait for a CDMA Interrupt
* -------------------------
*/
//Make an Initial Read of the Current State of the cdma_intr_in Input.
cdma_intr_in_value = *cdma_intr_in;
//Keep Looping for as long as the cdma_intr_in Input Does not Reach a Logic 1 Value.
while(cdma_intr_in_value != 1)
{
//Keep Reading the Last Value of the cdma_intr_in Input.
cdma_intr_in_value = *cdma_intr_in;
}
//Reset the Reader Variable.
cdma_intr_in_value = 0;
/*
* -------------------------------------------------------------------------------------------------------------------------
* Read the Upper and Lower Registers of the Global Clock Counter of the Shared Timer to Get the CDMA Send Tranfer End Time
* -------------------------------------------------------------------------------------------------------------------------
*/
//Read the Lower Register of the GCC of the Shared Timer to Get the 32 LSBs of the CDMA Send Tranfer End Time.
memcpy(&cdma_send_time_end_gcc_l, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_LOW_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 LSBs of the CDMA Send Tranfer End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_END_L_OFFSET) / 4), &cdma_send_time_end_gcc_l, sizeof(ap_uint<32>));
//Read the Upper Register of the GCC of the Shared Timer to Get the 32 MSBs of the CDMA Send Tranfer End Time.
memcpy(&cdma_send_time_end_gcc_u, (const ap_uint<32> *)(ext_cfg + (shared_apm_base_address + XAPM_GCC_HIGH_OFFSET) / 4), sizeof(ap_uint<32>));
//Store the 32 MSBs of the CDMA Send Tranfer End Time to a Specific Offset of the Metrics Memory.
memcpy((ap_uint<32> *)(ext_cfg + (shared_metrics_base_address + (sizeof(struct metrics) * accel_group_jump) + (sizeof(struct metrics) * repeat) + CDMA_SEND_TIME_END_U_OFFSET) / 4), &cdma_send_time_end_gcc_u, sizeof(ap_uint<32>));
/*
* ------------------------------------
* Acknowledge the CDMA Send Interrupt
* ------------------------------------
*/
//Read the Status Register of the CDMA Send Core which among others Includes the Status of the DMA's IRQs.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), sizeof(ap_uint<32>));
//Filter the Recently Read Value with the XAXICDMA_IRQ_ALL_MASK so as to Keep ONLY the IRQs that were Triggered.
irq = data_register & XAXICDMA_XR_IRQ_ALL_MASK;
//Write the new Value Back to the Status Register of the CDMA Send Core which Acknowledges the Triggered Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_SR_OFFSET) / 4), &irq, sizeof(ap_uint<32>));
/*
* -------------------------
* Reset the CDMA Send Core
* -------------------------
*/
//Write the Reset Mask to the Control Register of the CDMA Send Core in order to Reset the Core.
data_register = XAXICDMA_CR_RESET_MASK;
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* -----------------------------------------------
* Re-Enable the Interrupts on the CDMA Send Core
* -----------------------------------------------
*/
//Read the Control Register of the CDMA Send Core.
memcpy(&data_register, (const ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), sizeof(ap_uint<32>));
//Set the Recently Read Value with the Masks Required to Enable the IOC, Delay and Error IRQs.
//NOTE that IOC Stands for Interrupt On Complete.
data_register = data_register | (XAXICDMA_XR_IRQ_ERROR_MASK | XAXICDMA_XR_IRQ_IOC_MASK | XAXICDMA_XR_IRQ_DELAY_MASK);
//Write the new Value Back to the Control Register of the CDMA Send Core to Enable the Interrupts.
memcpy((ap_uint<32> *)(ext_cfg + (cdma_base_address + XAXICDMA_CR_OFFSET) / 4), &data_register, sizeof(ap_uint<32>));
/*
* Reset to Zero the 4 Registers of the Current Set of Registers of the Scheduler Buffer
*/
data_register = 0;
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + src_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + dst_address_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + data_size_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
memcpy((ap_uint<32> *)(ext_cfg + (scheduler_buffer_base_address + offset_first_reg_offset + (repeat * step_offset)) / 4), &data_register, sizeof(ap_uint<32>));
/*
* If the PCIe Mode is Enabled then we Have to Inform the Interrupt Manager to Send a MSI Interrupt Since the CDMA Send Transfer is Complete
* which, also, Means that the Acceleration Procedure is Complete.
*/
if(pcie_mode == 1)
{
//The Current Iteration Value along with the accel_group_jump Value Indicate the Acceleration Group Number of the Acceleration Group Indirect which Can be 2, 3, 4 or 5.
//2 is AGI0.
//3 is AGI1.
//4 is AGI2.
//5 is AGI3.
data_register = repeat + accel_group_jump;
//Write the Current Acceleration Group Number to a Specific Register of the Interrupt Manager to Let It Know which Acceleration Group Has Completed.
memcpy((ap_uint<32> *)(ext_cfg + (interrupt_manager_register_offset + (repeat * 4)) / 4), &data_register, sizeof(ap_uint<32>));
}
}
}
}
return 1;
}

View File

@@ -0,0 +1,126 @@
#define APM_READ_TRANSACTIONS_OFFSET 0
#define APM_READ_BYTES_OFFSET 4
#define APM_WRITE_TRANSACTIONS_OFFSET 8
#define APM_WRITE_BYTES_OFFSET 12
#define APM_PACKETS_OFFSET 16
#define APM_BYTES_OFFSET 20
#define APM_GCC_L_OFFSET 24
#define APM_GCC_U_OFFSET 28
#define CDMA_FETCH_TIME_START_L_OFFSET 32
#define CDMA_FETCH_TIME_START_U_OFFSET 36
#define CDMA_FETCH_TIME_END_L_OFFSET 40
#define CDMA_FETCH_TIME_END_U_OFFSET 44
#define CDMA_SEND_TIME_START_L_OFFSET 48
#define CDMA_SEND_TIME_START_U_OFFSET 52
#define CDMA_SEND_TIME_END_L_OFFSET 56
#define CDMA_SEND_TIME_END_U_OFFSET 60
#define DMA_ACCEL_TIME_START_L_OFFSET 64
#define DMA_ACCEL_TIME_START_U_OFFSET 68
#define DMA_ACCEL_TIME_END_L_OFFSET 72
#define DMA_ACCEL_TIME_END_U_OFFSET 76
struct image_info
{
ap_uint<32> rows;
ap_uint<32> columns;
ap_uint<64> size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
ap_uint<32> apm_read_transactions; //Offset 0 Bytes
ap_uint<32> apm_read_bytes; //Offset 4 Bytes
ap_uint<32> apm_write_transactions; //Offset 8 Bytes
ap_uint<32> apm_write_bytes; //Offset 12 Bytes
ap_uint<32> apm_packets; //Offset 16 Bytes
ap_uint<32> apm_bytes; //Offset 20 Bytes
ap_uint<32> apm_gcc_l; //Offset 24 Bytes
ap_uint<32> apm_gcc_u; //Offset 28 Bytes
ap_uint<32> cdma_fetch_time_start_l; //Offset 32 Bytes
ap_uint<32> cdma_fetch_time_start_u; //Offset 36 Bytes
ap_uint<32> cdma_fetch_time_end_l; //Offset 40 Bytes
ap_uint<32> cdma_fetch_time_end_u; //Offset 44 Bytes
ap_uint<32> cdma_send_time_start_l; //Offset 48 Bytes
ap_uint<32> cdma_send_time_start_u; //Offset 52 Bytes
ap_uint<32> cdma_send_time_end_l; //Offset 56 Bytes
ap_uint<32> cdma_send_time_end_u; //Offset 60 Bytes
ap_uint<32> dma_accel_time_start_l; //Offset 64 Bytes
ap_uint<32> dma_accel_time_start_u; //Offset 68 Bytes
ap_uint<32> dma_accel_time_end_l; //Offset 72 Bytes
ap_uint<32> dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
ap_uint<64> total_time_start;
ap_uint<64> total_time_end;
ap_uint<64> sleep_time_start;
ap_uint<64> sleep_time_end;
ap_uint<64> preparation_time_start;
ap_uint<64> preparation_time_end;
ap_uint<64> load_time_start;
ap_uint<64> load_time_end;
ap_uint<64> save_time_start;
ap_uint<64> save_time_end;
};
struct status_flags
{
ap_uint<32> accel_direct_0_occupied_pid;
ap_uint<32> accel_direct_1_occupied_pid;
ap_uint<32> accel_indirect_0_occupied_pid;
ap_uint<32> accel_indirect_1_occupied_pid;
ap_uint<32> accel_indirect_2_occupied_pid;
ap_uint<32> accel_indirect_3_occupied_pid;
ap_uint<32> accel_sg_0_occupied_pid;
ap_uint<32> accelerator_busy;
ap_uint<32> open_modules;
};
struct shared_repository
{
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};

View File

@@ -0,0 +1,74 @@
/*******************************************************************************
Vendor: Xilinx
Associated Filename: ap_axi_sdata.h
Purpose: AXI data type for AutoESL
Revision History: February 13, 2012 - initial release
*******************************************************************************
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
This file contains confidential and proprietary information of Xilinx, Inc. and
is protected under U.S. and international copyright and other intellectual
property laws.
DISCLAIMER
This disclaimer is not a license and does not grant any rights to the materials
distributed herewith. Except as otherwise provided in a valid license issued to
you by Xilinx, and to the maximum extent permitted by applicable law:
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
in contract or tort, including negligence, or under any other theory of
liability) for any loss or damage of any kind or nature related to, arising under
or in connection with these materials, including for any direct, or any indirect,
special, incidental, or consequential loss or damage (including loss of data,
profits, goodwill, or any type of loss or damage suffered as a result of any
action brought by a third party) even if such damage or loss was reasonably
foreseeable or Xilinx had been advised of the possibility of the same.
CRITICAL APPLICATIONS
Xilinx products are not designed or intended to be fail-safe, or for use in any
application requiring fail-safe performance, such as life-support or safety
devices or systems, Class III medical devices, nuclear facilities, applications
related to the deployment of airbags, or any other applications that could lead
to death, personal injury, or severe property or environmental damage
(individually and collectively, "Critical Applications"). Customer assumes the
sole risk and liability of any use of Xilinx products in Critical Applications,
subject only to applicable laws and regulations governing limitations on product
liability.
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
ALL TIMES.
*******************************************************************************/
#ifndef __AP__AXI_SDATA__
#define __AP__AXI_SDATA__
#define AP_INT_MAX_W 4096
#include "ap_int.h"
template<int D,int U,int TI,int TD>
struct ap_axis2{
ap_int<D> data;
ap_int<(D+7)/8> strb;
ap_int<U> user;
ap_int<1> last;
// ap_int<TI> tid;
ap_int<TD> tdest;
};
template<int D,int U,int TI,int TD>
struct ap_axiu2{
ap_uint<D> data;
ap_uint<(D+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
//ap_uint<TI> tid;
ap_uint<TD> tdest;
};
//typedef ap_axis<int D, int U, int TI, int TD> ap_axis_unsigned<int D, int U, int TI, int TD>;
#endif

View File

@@ -0,0 +1,99 @@
/*******************************************************************************
Vendor: Xilinx
Associated Filename: ap_bmp.h
Purpose: BMP image reader and writer header file for AutoESL
Revision History: February 13, 2012 - initial release
*******************************************************************************
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
This file contains confidential and proprietary information of Xilinx, Inc. and
is protected under U.S. and international copyright and other intellectual
property laws.
DISCLAIMER
This disclaimer is not a license and does not grant any rights to the materials
distributed herewith. Except as otherwise provided in a valid license issued to
you by Xilinx, and to the maximum extent permitted by applicable law:
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
in contract or tort, including negligence, or under any other theory of
liability) for any loss or damage of any kind or nature related to, arising under
or in connection with these materials, including for any direct, or any indirect,
special, incidental, or consequential loss or damage (including loss of data,
profits, goodwill, or any type of loss or damage suffered as a result of any
action brought by a third party) even if such damage or loss was reasonably
foreseeable or Xilinx had been advised of the possibility of the same.
CRITICAL APPLICATIONS
Xilinx products are not designed or intended to be fail-safe, or for use in any
application requiring fail-safe performance, such as life-support or safety
devices or systems, Class III medical devices, nuclear facilities, applications
related to the deployment of airbags, or any other applications that could lead
to death, personal injury, or severe property or environmental damage
(individually and collectively, "Critical Applications"). Customer assumes the
sole risk and liability of any use of Xilinx products in Critical Applications,
subject only to applicable laws and regulations governing limitations on product
liability.
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
ALL TIMES.
*******************************************************************************/
#ifndef __XLNX__BITMAP__
#define __XLNX__BITMAP__
// Basic color definitions
#define BLACK 0
#define WHITE 255
// Maximum image size
#define MAX_ROWS 1080
#define MAX_COLS 1920
//File Information Header
typedef struct{
unsigned short FileType;
unsigned int FileSize;
unsigned short Reserved1;
unsigned short Reserved2;
unsigned short Offset;
}BMPHeader;
typedef struct{
unsigned int Size;
unsigned int Width;
unsigned int Height;
unsigned short Planes;
unsigned short BitsPerPixel;
unsigned int Compression;
unsigned int SizeOfBitmap;
unsigned int HorzResolution;
unsigned int VertResolution;
unsigned int ColorsUsed;
unsigned int ColorsImportant;
}BMPImageHeader;
typedef struct{
BMPHeader *file_header;
BMPImageHeader *image_header;
unsigned int *colors;
unsigned char *data;
unsigned char R[MAX_ROWS][MAX_COLS];
unsigned char G[MAX_ROWS][MAX_COLS];
unsigned char B[MAX_ROWS][MAX_COLS];
unsigned char Y[MAX_ROWS][MAX_COLS];
char U[MAX_ROWS][MAX_COLS];
char V[MAX_ROWS][MAX_COLS];
}BMPImage;
//Read Function
int BMP_Read(char *file, int row, int col, unsigned char *R, unsigned char *G, unsigned char *B);
//Write Function
int BMP_Write(char *file, int row, int col, unsigned char *R, unsigned char *G, unsigned char *B);
#endif

View File

@@ -0,0 +1,341 @@
/*******************************************************************************
Vendor: Xilinx
Associated Filename: ap_video.h
Purpose: Video datatype header file for AutoESL
Revision History: February 13, 2012 - initial release
January 28, 2015 - Caes-lab TEI Crete revised
*******************************************************************************
© Copyright 2008 - 2012 Xilinx, Inc. All rights reserved.
This file contains confidential and proprietary information of Xilinx, Inc. and
is protected under U.S. and international copyright and other intellectual
property laws.
DISCLAIMER
This disclaimer is not a license and does not grant any rights to the materials
distributed herewith. Except as otherwise provided in a valid license issued to
you by Xilinx, and to the maximum extent permitted by applicable law:
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
in contract or tort, including negligence, or under any other theory of
liability) for any loss or damage of any kind or nature related to, arising under
or in connection with these materials, including for any direct, or any indirect,
special, incidental, or consequential loss or damage (including loss of data,
profits, goodwill, or any type of loss or damage suffered as a result of any
action brought by a third party) even if such damage or loss was reasonably
foreseeable or Xilinx had been advised of the possibility of the same.
CRITICAL APPLICATIONS
Xilinx products are not designed or intended to be fail-safe, or for use in any
application requiring fail-safe performance, such as life-support or safety
devices or systems, Class III medical devices, nuclear facilities, applications
related to the deployment of airbags, or any other applications that could lead
to death, personal injury, or severe property or environmental damage
(individually and collectively, "Critical Applications"). Customer assumes the
sole risk and liability of any use of Xilinx products in Critical Applications,
subject only to applicable laws and regulations governing limitations on product
liability.
THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
ALL TIMES.
*******************************************************************************/
#ifndef ___AP__VIDEO__
#define ___AP__VIDEO__
#include "ap_int.h"
/* Parametrized RGB structure */
template <int A, int D, int C>
struct ap_rgb{
ap_uint<A> B;
ap_uint<D> G;
ap_uint<C> R;
};
/* Parametrized YUV structure */
template <int A, int B, int C>
struct ap_yuv{
ap_uint<A> Y;
ap_int<B> U;
ap_int<C> V;
};
/* Line buffer class definition */
template <typename T, int LROW, int LCOL>
class ap_linebuffer{
public:
T M[LROW][LCOL];
ap_linebuffer(){
#pragma AP ARRAY_PARTITION variable=M dim=1 complete
//#pragma AP data_pack variable=M
//#pragma AP dependence variable=M intra false
//#pragma AP dependence variable=M inter false
};
~ap_linebuffer(){};
void shift_up(int col);
void shift_down(int col);
void shift_down_all();
void insert(T value, int row, int col);
void insert_top(T value, int col);
void insert_bottom(T value, int col);
void print(int StartCol, int EndCol);
T getval(int RowIndex,int ColIndex);
};
/* Line buffer print function.
* Prints the values of all rows in the line buffer
* between StartCol and EndCol
*/
template <typename T, int LROW, int LCOL>
void ap_linebuffer<T,LROW,LCOL>::print(int StartCol, int EndCol)
{
int i, j;
for(i = LROW-1; i > -1; i--){
printf("Line %d:\t",i);
for(j=StartCol; j < EndCol; j++){
printf("%d\t",M[i][j]);
}
printf("\n");
}
printf("\n");
}
/* Line buffer shift up
* Assumes new data pixel will be entered at the bottom of the line buffer
* The bottom is row = 0
*/
template <typename T, int LROW, int LCOL>
void ap_linebuffer<T,LROW,LCOL>::shift_up(int col)
{
#pragma AP inline
int i;
for(i = LROW-1; i > 0; i--){
#pragma AP unroll
M[i][col] = M[i-1][col];
}
}
/* Line buffer shift down
* Assumes new data pixel will be entered at the top of the line buffer
* The bottom is row = LROW - 1
*/
template <typename T, int LROW, int LCOL>
void ap_linebuffer<T,LROW,LCOL>::shift_down(int col)
{
#pragma AP inline
int i;
for(i = 0; i < LROW-1; i++){
#pragma AP unroll
M[i][col] = M[i+1][col];
}
}
/* Line buffer shift down
* Assumes new data pixel will be entered at the top of the line buffer
* The bottom is row = LROW - 1
*/
template <typename T, int LROW, int LCOL>
void ap_linebuffer<T,LROW,LCOL>::shift_down_all()
{
#pragma AP inline
int i, j;
for(i = 0; i < LROW-1; i++){
#pragma AP unroll
for(j = 0; j < LCOL; j++){
#pragma AP unroll factor=120
M[i][j] = M[i+1][j];
}
}
}
/* Line buffer insert bottom
* Inserts a new value in the bottom row of the line buffer at column = col
* The bottom is row = 0
*/
template <typename T, int LROW, int LCOL>
void ap_linebuffer<T,LROW,LCOL>::insert_bottom(T value, int col)
{
#pragma AP inline
M[0][col] = value;
}
/* Line buffer insert top
* Inserts a new value in the top row of the line buffer at column = col
* The bottom is row = LROW - 1
*/
template <typename T, int LROW, int LCOL>
void ap_linebuffer<T,LROW,LCOL>::insert_top(T value, int col)
{
#pragma AP inline
M[LROW-1][col] = value;
}
/* Line buffer insert
* Inserts a new value at any location of the line buffer
*/
template <typename T, int LROW, int LCOL>
void ap_linebuffer<T,LROW,LCOL>::insert(T value, int row, int col)
{
#pragma AP inline
M[row][col] = value;
}
/* Line buffer getval
* Returns the data value in the line buffer at position RowIndex, ColIndex
*/
template <typename T, int LROW, int LCOL>
T ap_linebuffer<T,LROW,LCOL>::getval(int RowIndex,int ColIndex)
{
#pragma AP inline
T return_value;
return_value = M[RowIndex][ColIndex];
return return_value;
}
/* Memory window class definition */
template <typename T, int LROW, int LCOL>
class ap_window{
public:
T M[LROW][LCOL];
ap_window(){
#pragma AP ARRAY_PARTITION variable=M dim=0 complete
//#pragma AP data_pack variable=M
};
~ap_window(){};
void shift_right();
void shift_left();
void shift_up();
void shift_down();
void insert(T value, int row,int col);
void print();
T getval(int RowIndex,int ColIndex);
};
/* Window print
* Prints the entire contents of the memory window
*/
template <typename T, int LROW, int LCOL>
void ap_window<T,LROW,LCOL>::print()
{
int i, j;
printf("Window Size = %d x %d\n",LROW,LCOL);
printf("Col \t");
for(j = 0; j < LCOL; j++){
printf("%d \t",j);
}
printf("\n");
for(i = LROW-1; i > -1; i--){
printf("Row %d: \t",i);
for(j=0; j < LCOL; j++){
printf("%d\t",M[i][j]);
}
printf("\n");
}
printf("\n");
}
/* Window shift right
* Moves all the contents of the window horizontally
* Assumes new values will be placed in column = LCOL-1
*/
template <typename T, int LROW, int LCOL>
void ap_window<T,LROW,LCOL>::shift_right()
{
#pragma AP inline
int i, j;
for(i = 0; i < LROW; i++){
#pragma AP unroll
for(j=0; j < LCOL-1; j++){
#pragma AP unroll
M[i][j] = M[i][j+1];
}
}
}
/* Window shift left
* Moves all the contents of the window horizontally
* Assumes new values will be placed in column = 0
*/
template <typename T, int LROW, int LCOL>
void ap_window<T,LROW,LCOL>::shift_left()
{
#pragma AP inline
int i, j;
for(i = 0; i < LROW; i++){
#pragma AP unroll
for(j=LCOL-1; j > 0; j--){
#pragma AP unroll
M[i][j] = M[i][j-1];
}
}
}
/* Window shift up
* Moves all the contents of the window vertically
* Assumes new values will be placed in row = 0
*/
template <typename T, int LROW, int LCOL>
void ap_window<T,LROW,LCOL>::shift_up()
{
#pragma AP inline
int i, j;
for(i = LROW-1; i > 0; i--){
#pragma AP unroll
for(j=0; j < LCOL; j++){
#pragma AP unroll
M[i][j] = M[i-1][j];
}
}
}
/* Window shift down
* Moves all the contents of the window vertically
* Assumes new values will be placed in row = LROW - 1
*/
template <typename T, int LROW, int LCOL>
void ap_window<T,LROW,LCOL>::shift_down()
{
#pragma AP inline
int i, j;
for(i = 0; i < LROW-1; i++){
#pragma AP unroll
for(j=0; j < LCOL; j++){
#pragma AP unroll
M[i][j] = M[i+1][j];
}
}
}
/* Window insert
* Inserts a new value at any location of the window
*/
template <typename T, int LROW, int LCOL>
void ap_window<T,LROW,LCOL>::insert(T value, int row, int col)
{
#pragma AP inline
M[row][col] = value;
}
/* Window getval
* Returns the value of any window location
*/
template <typename T, int LROW, int LCOL>
T ap_window<T,LROW,LCOL>::getval(int RowIndex, int ColIndex)
{
#pragma AP inline
T return_value;
return_value = M[RowIndex][ColIndex];
return return_value;
}
#endif

View File

@@ -0,0 +1,19 @@
#include <ap_int.h>
#define PAGE_SIZE 4096
int is_packet_complete(int *count, int size) {
//Increase by 4 Bytes.
*count += 4;
//If the Current Count Value is Equal to the Packet Size then Return 1.
if (*count == size)
{
*count = 0;
return 1;
} else
{
return 0;
}
}

View File

@@ -0,0 +1,6 @@
#ifndef _PACKET_MODE_OPERATIONS_H_
#define _PACKET_MODE_OPERATIONS_H_
int is_packet_complete(int *count, int size);
#endif

View File

@@ -0,0 +1,17 @@
open_project Sobel_Filter
set_top sobel_filter
add_files sobel.cpp
add_files sobel_operations.cpp
add_files packet_mode_operations.cpp
open_solution "solution1"
#The Part Refers to the Xilinx Virtex 7 VC707 FPGA Development Board
set_part {xc7vx485tffg1761-2}
create_clock -period 10 -name default
csynth_design
export_design -format ip_catalog -display_name "Sobel Filter" -version "5.8"

View File

@@ -0,0 +1,611 @@
#include "sobel.h"
#include "sobel_operations.h"
#include "packet_mode_operations.h"
/*
* sobel_filter()
*
* The Hardware Funtionality of the Sobel Filter (HW Accelerator) Core.
*
* The Sobel Filter is a HW Accelerator that Applies Sobel Edge Detection on Images.
* It Receives and Processes the Image Data in Rows.
* In order to Produce one Processed Row it Requires 3 Received Rows.
* This Precondition is due to the Fact that Edge Detection is Applied to a Pixel according to its Neighbor Pixels.
*
* Once the Sobel Filter Receives the First 3 Rows it Produces one Processed Row.
* Then it Rejects the First Row, Sets the Second Row as First and Sets the Last Row as Second.
* The Next/Newly Received Row is Set as the Last of the Rows.
* Now there are, again, 3 Rows in Order to Produce the Next Processed Row.
* This Procedure Carries on until all the Rows of the Image are Received and Processed.
*
* NOTE that the First and Last Rows of the Processed Image are Filled with Dark Pixels.
* NOTE also that the First and Last Columns of all the Rows of the Processed Image are Filled with Dark Pixels.
*
* The Sobel Edge Detection Cannot be Applied to the Perimetric Pixels of the Image Since they Miss the Required Amount of Neighbors
* this is why they are Filled with Dark Pixels.
*
* The Sequential Steps of the Sobel Filter are as Follows:
*
* a --> Send the First Row which is Filled with Dark Pixels.
* b --> Pre-Fetch the 3 First Rows of the Image.
* c --> Process the 3 Rows.
* d --> Fill the First and Last Columns of the Produced Row with Dark Pixels.
* e --> Send the Produced Row.
* f --> Receive the Next Row.
* g --> Start Again from Step c Until Receiving and Processing all the Rows.
* h --> Send the Last Row which is Filled with Dark Pixels.
*
* The Function Parameters are the Input/Output Ports/Interfaces of the Core:
*
* 01 --------> The AXI Stream Input/Slave Interface of the Core Used to Receive the Image Data.
* 02 --------> The AXI Stream Output/Master Interface of the Core Used to Forward the Processed Image Data.
* 03 to 06 --> Registers of the Core that are Accessed through the AXI Slave Lite Interface of the Core.
*
*
* IMPORTANT TECHNIQUES Used to Improve the Overall Performance:
*
* A)Each Image Row is not Received in a Single Buffer.
* Instead, while it is being Received it is Equally Splitted and Distributed in 16 Sector Buffers.
* Each Sector Buffer has no Dependence with the Rest Sector Buffers so the 16 Pieces of the Image Row Can be Processed in Parallel.
* The HLS Tool Creates 16 Processing Units to Make Parallel Processing Possible.
*
*
* B)Another Improvement Technique is the Usage of Four Line Sector Buffers which Allows
* the Core to Process the Current 3 Rows while Concurrently Receiving the Next Row.
* The Four Line Sector Buffer is Designed with 4 Lines where each is Used to Store the Data of a Single Row.
* When the Sobel Filter Receives and Fills the 3 First Lines with 3 Rows it Starts the Processing.
* The Fourth Line is Free to Start Receiving the Next Row while the Rest 3 Lines are Occupied with the Processing.
*
*
* C)In Older Approaches after 3 Lines of a Three Line Sector Buffer were Processed the Lines would
* Have to be Shifted Up so that the Last Line Could be Fed with the Next Received Row.
* This Approach Required a Significant Amount of Copies where each Pixel of a Line of the Sector Buffer would Have to be Copied to the Upper Line.
* The new Technique Requires Zero Copies as it Uses Indexing to Store the Received Rows in the Four Line Sector Buffer.
*
* Indexing Concerns which Should be Considered as the First, Second and Third Row to Process and where the Next Received Row Should be Stored.
*
* Initially: the First Received Row is Stored in the Line with Index 0 of the Four Line Sector Buffer.
* : the Second Received Row is Stored in the Line with Index 3 of the Four Line Sector Buffer.
* : the Third Received Row is Stored in the Line with Index 2 of the Four Line Sector Buffer.
* : the Line with Index 1 is Used to Store the Next Received Line while the Other 3 are being Processed.
*
* When the Process of the 3 Lines Completes and a new Row is Received then the Indexing Changes so that we Can Start a new Processing and Receive another Row.
* Now the First Row is no Longer Needed so the Line with Index 0 will be Used to Receive the Next Row.
* The Second Row Becomes the First Row for the New Processing so the Line with Index 3 will be Used as the First Row.
* The Third Row Becomes the Second Row for the New Processing so the Line with Index 2 will be Used as the Second Row.
* The Last Received Row Becomes the Third Row for the New Processing so the Line with Index 1 will be Used as the Third Row.
*
* Following the Same Pattern as to which Lines to Process and where to Store the Next Row Leads to the Table Below:
*
* Index 0 | First Row | Next Row | Third Row | Second Row |
* Index 1 | Next Row | Third Row | Second Row | First Row |
* Index 2 | Third Row | Second Row | First Row | Next Row |
* Index 3 | Second Row | First Row | Next Row | Third Row |
*
* To Make Indexing Applicable as Part of the Code we Used the first, second, last and temp Integer Variables which Hold the Current Index
* in the Four Line Sector Buffer where each Row is Stored.
*
* In order to Calculate the Next Indexing for each Row we Used the Formula Below:
* Index = (Index + 3) % 4
*/
int sobel_filter(/*01*/AXI_PIXEL STREAM_IN[MAX_WIDTH],
/*02*/AXI_PIXEL STREAM_OUT[MAX_WIDTH],
/*03*/int rows,
/*04*/int cols,
/*05*/int packet_mode_en,
/*06*/int packet_size
)
{
/*
* Set the Fifo of the STREAM_OUT and STREAM_IN Interfaces to be Implemented with LUT RAM Memory.
*/
#pragma HLS RESOURCE variable=STREAM_OUT core=FIFO_LUTRAM
#pragma HLS RESOURCE variable=STREAM_IN core=FIFO_LUTRAM
/*
* The rows is a Register to Store the Number of Rows of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=rows bundle=S_AXI4_LITE
/*
* The cols is a Register to Store the Number of Columns of the Image that will be Accelerated.
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=cols bundle=S_AXI4_LITE
/*
* The packet_mode_en is a Register to Store a Value that Enables/Disables the Packet Mode.
* The Packet Mode Should be Enabled when the Data are Transferred with Scatter/Gather Transactions.
* When the Packet Mode is Enabled the Core Sends a TLAST=1 Signal in the Output Interface for each Transmitted Packet.
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=packet_mode_en bundle=S_AXI4_LITE
/*
* The packet_size is a Register to Store the Size that each Packet Should Have (e.g 4K) when Using Scatter/Gather Transfers.
* This Register is Accessed through the AXI Slave Lite Interface (S_AXI4_LITE) of the Core.
*/
#pragma HLS INTERFACE s_axilite port=packet_size bundle=S_AXI4_LITE
#pragma HLS INTERFACE s_axilite port=return bundle=S_AXI4_LITE
/*
* Set the STREAM_OUT and STREAM_IN Interfaces of the Core to be AXI Stream Interfaces.
* The Fifo Depth is Set to 1920 which is the Maximum Image Width that the Core Can Support to Process.
*/
#pragma HLS INTERFACE axis depth=1920 port=STREAM_IN
#pragma HLS INTERFACE axis depth=1920 port=STREAM_OUT
int bytes_count; //Count the Number of Tranferred Bytes.
int first; //Used to Know where the First Received Row is Located in the LINE4_SECTOR_BUFFER.
int second; //Used to Know where the Second Received Row is Located in the LINE4_SECTOR_BUFFER.
int last; //Used to Know where the Last Received Row is Located in the LINE4_SECTOR_BUFFER.
int temp; //Used to Know where the Newest Received Row Should be Temporalily Located in the LINE4_SECTOR_BUFFER.
/*
* The Number of Iterations Required to Receive or Send each Sector of a Row.
* The sector_iter is an Array with as many Fields as the Number of Sectors.
* The sector_iter Array is Configured to be Completely Partitioned according to the #pragma HLS ARRAY_PARTITION.
*/
int sector_iter[SECTORS];
#pragma HLS ARRAY_PARTITION variable=sector_iter dim=1 complete
int sector_size; //The Number of Columns that each Sector Should Store.
int remaining_pixels; //If the Number of Columns is not an Integer Multiple of the Number of Sectors then we Have Remaining Pixels that Should be Distributed in all the Sectors.
const RGB zero_pixel = {0, 0, 0}; //This is a Dark Pixel Used to Set the First and Last Row and all the First and Last Columns of the Image.
/*
* Declare 16 Memory Buffers of Type LINE4_SECTOR_BUFFER.
* Each Buffer is Set to be Dual Port BRAM according to the #pragma HLS RESOURCE.
*
* These Buffers are Used to Receive the Image Rows before being Processed.
*/
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR0;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR0 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR1;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR1 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR2;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR2 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR3;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR3 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR4;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR4 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR5;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR5 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR6;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR6 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR7;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR7 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR8;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR8 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR9;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR9 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR10;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR10 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR11;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR11 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR12;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR12 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR13;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR13 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR14;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR14 core=RAM_2P_BRAM
LINE4_SECTOR_BUFFER LINE4_BUFFER_SECTOR15;
#pragma HLS RESOURCE variable=LINE4_BUFFER_SECTOR15 core=RAM_2P_BRAM
/*
* Declare 16 Memory Buffers of Type LINE1_SECTOR_BUFFER.
* Each Buffer is Set to be Dual Port BRAM according to the #pragma HLS RESOURCE.
*
* These Buffers are Used to Store the Image Rows after being Processed.
*/
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR0;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR0 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR1;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR1 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR2;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR2 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR3;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR3 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR4;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR4 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR5;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR5 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR6;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR6 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR7;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR7 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR8;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR8 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR9;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR9 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR10;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR10 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR11;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR11 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR12;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR12 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR13;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR13 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR14;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR14 core=RAM_2P_BRAM
LINE1_SECTOR_BUFFER LINE1_BUFFER_SECTOR15;
#pragma HLS RESOURCE variable=LINE1_BUFFER_SECTOR15 core=RAM_2P_BRAM
/*
* Set Initial Values.
*/
bytes_count = 0;
first = 0;
second = 3;
last = 2;
temp = 0;
//Calculate the Number of Columns that Should be Stored to each Sector Buffer.
//NOTE that this is the Initial Sector Size that is Equal to All the Sector Buffers.
sector_size = (int)(cols / SECTORS);
/*
* Calculate any Remaining Bytes in Case the Number of Columns is not an Integer Multiple of the Number of Sector Buffers.
*
* For Example, for an Image of Width 524 Pixels we Have 524 Pixels / 16 Sectors = 32.75 Pixels which is Not an Integer Multiple of the 16 Sector Buffers.
* For each Sector Buffer we Have a Sector Size of 32 Pixels so 32 Pixels * 16 Sectors = 512 which Leads to Have 12 Remaining Pixels from the Initial 524.
*
* As a Result each of the 16 Sector Buffers Initially Has a Sector Size of 32.
* The Remaining Pixels Should be Distributed to the Sector Buffers so the First 12 Sector Buffers will Have a Sector Size with one More Pixel which Leads to 33 Pixels Sector Size.
*
* |Sector0 |Sector1 |Sector2 |Sector3 |Sector4 |Sector5 |Sector6 |Sector7 |Sector8 |Sector9 |Sector10 |Sector11 |Sector12 |Sector13 |Sector14 |Sector15|
* |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |33 |32 |32 |32 |32 |
*
*/
remaining_pixels = cols - (sector_size * SECTORS);
//Loop to Distribute the Remaining Bytes to the Sector Buffers.
for (int i = 0; i < SECTORS; i++)
{
#pragma HLS PIPELINE II=1
//Set the Array Field of the Corresponding Sector Buffer with the Initial Sector Size.
sector_iter[i] = sector_size;
//Check if we still Have Remaining Pixels
if (remaining_pixels > 0)
{
//Decrease the Number of Remaining Pixels.
remaining_pixels--;
//Increment by 1 the Sector Size of the Corresponding Sector Buffer
sector_iter[i] ++;
}
}
/*
* The First Line/Row of an Image Processed with Sobel Edge Detection is Always Filled with Zero Pixels.
* So, Send the First Row of Zero Pixels.
*/
send_1st_line:
for (int col=0; col<cols; col++)
{
#pragma HLS PIPELINE II=1
AXI_PIXEL output_pixel; //Declare a AXI_PIXEL that Represents the AXI Stream Output Interface.
output_pixel.strb = 0xF; //Set the Strobe of the AXI Stream Interface so that all 4 Transmitted Bytes are Valid.
output_pixel.user = 0x1;
output_pixel.tdest = 0x1;
//If the Packet Mode is Enabled then we Have to Set the TLAST to 1 if a Full Packet of Size packet_size is Transmitted.
if (packet_mode_en == 1)
{
//The TLAST(last) Gets the Return Value of the is_packet_complete() which Returns 1 if the Number of Transmitted Bytes is Equal to the Packket Size.
output_pixel.last = is_packet_complete(&bytes_count, packet_size);
}
else
{
output_pixel.last = 0x0;
}
//Set the Data to Transmit to Have Zero Value Since we Transmit Zero Pixels.
output_pixel.data = 0x0;
//Forward the Data along with the Rest Signals to the AXI Stream Output Interface.
STREAM_OUT[col] = output_pixel;
}
/*
* The Sobel Edge Detection Algorithm Requires Three Rows in Order to Produce one Processed Row.
* So, Pre-Fetch the First 3 Rows.
*/
prefetch_3lines:
for (int row=0; row<3; row++)
{
//Receive a Row which is Distributed to the 16 Four Line Sector Buffers of Type LINE4_SECTOR_BUFFER.
receive_post_line(STREAM_IN,
&LINE4_BUFFER_SECTOR0,
&LINE4_BUFFER_SECTOR1,
&LINE4_BUFFER_SECTOR2,
&LINE4_BUFFER_SECTOR3,
&LINE4_BUFFER_SECTOR4,
&LINE4_BUFFER_SECTOR5,
&LINE4_BUFFER_SECTOR6,
&LINE4_BUFFER_SECTOR7,
&LINE4_BUFFER_SECTOR8,
&LINE4_BUFFER_SECTOR9,
&LINE4_BUFFER_SECTOR10,
&LINE4_BUFFER_SECTOR11,
&LINE4_BUFFER_SECTOR12,
&LINE4_BUFFER_SECTOR13,
&LINE4_BUFFER_SECTOR14,
&LINE4_BUFFER_SECTOR15,
temp,
sector_iter
);
//Calculate the Vertical Position where the Next Received Row Should be Stored in the Four Line Sector Buffers.
temp = (temp+3)%4;
}
/*
* Produce a Processed Row from the 3 Pre-Fetched Rows.
* Send the Processed Row and then Receive a New Row in Order to Produce again a Processed Row.
*
* Loop Until Receiving all the Rows of the Image.
*/
proc_module:
for (int row=0; row<rows-3; row++)
{
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR0 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR1 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR2 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR3 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR4 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR5 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR6 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR7 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR8 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR9 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR10 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR11 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR12 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR13 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR14 array //false
#pragma HLS DEPENDENCE variable=LINE4_BUFFER_SECTOR15 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR0 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR1 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR2 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR3 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR4 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR5 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR6 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR7 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR8 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR9 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR10 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR11 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR12 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR13 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR14 array //false
#pragma HLS DEPENDENCE variable=LINE1_BUFFER_SECTOR15 array //false
//Process the 3 Rows that are Received in the 16 Four Line Sector Buffers and Produce one Processed Row.
start_sobel_operations(&LINE4_BUFFER_SECTOR0,
&LINE4_BUFFER_SECTOR1,
&LINE4_BUFFER_SECTOR2,
&LINE4_BUFFER_SECTOR3,
&LINE4_BUFFER_SECTOR4,
&LINE4_BUFFER_SECTOR5,
&LINE4_BUFFER_SECTOR6,
&LINE4_BUFFER_SECTOR7,
&LINE4_BUFFER_SECTOR8,
&LINE4_BUFFER_SECTOR9,
&LINE4_BUFFER_SECTOR10,
&LINE4_BUFFER_SECTOR11,
&LINE4_BUFFER_SECTOR12,
&LINE4_BUFFER_SECTOR13,
&LINE4_BUFFER_SECTOR14,
&LINE4_BUFFER_SECTOR15,
&LINE1_BUFFER_SECTOR0,
&LINE1_BUFFER_SECTOR1,
&LINE1_BUFFER_SECTOR2,
&LINE1_BUFFER_SECTOR3,
&LINE1_BUFFER_SECTOR4,
&LINE1_BUFFER_SECTOR5,
&LINE1_BUFFER_SECTOR6,
&LINE1_BUFFER_SECTOR7,
&LINE1_BUFFER_SECTOR8,
&LINE1_BUFFER_SECTOR9,
&LINE1_BUFFER_SECTOR10,
&LINE1_BUFFER_SECTOR11,
&LINE1_BUFFER_SECTOR12,
&LINE1_BUFFER_SECTOR13,
&LINE1_BUFFER_SECTOR14,
&LINE1_BUFFER_SECTOR15,
sector_size,
first,
second,
last
);
//Set the First Pixel of the First One Line Sector Buffer to be a Dark/Zero Pixel.
//NOTE the First Pixel of the First One Line Sector Buffer is Actually the First Pixel of a Row.
LINE1_BUFFER_SECTOR0.insert(zero_pixel, 0, 0);
//Set the Last Pixel of the Last One Line Sector Buffer to be a Dark/Zero Pixel.
//NOTE the Last Pixel of the Last One Line Sector Buffer is Actually the Last Pixel of a Row.
LINE1_BUFFER_SECTOR15.insert(zero_pixel, 0, sector_iter[15]-1);
//The Produced Row from the three Processed Rows is Stored in the 16 One Line Sector Buffers.
//Send the Produced Row over the AXI Stream Out Interface.
send_line(STREAM_OUT,
&LINE1_BUFFER_SECTOR0,
&LINE1_BUFFER_SECTOR1,
&LINE1_BUFFER_SECTOR2,
&LINE1_BUFFER_SECTOR3,
&LINE1_BUFFER_SECTOR4,
&LINE1_BUFFER_SECTOR5,
&LINE1_BUFFER_SECTOR6,
&LINE1_BUFFER_SECTOR7,
&LINE1_BUFFER_SECTOR8,
&LINE1_BUFFER_SECTOR9,
&LINE1_BUFFER_SECTOR10,
&LINE1_BUFFER_SECTOR11,
&LINE1_BUFFER_SECTOR12,
&LINE1_BUFFER_SECTOR13,
&LINE1_BUFFER_SECTOR14,
&LINE1_BUFFER_SECTOR15,
sector_iter,
packet_mode_en,
packet_size,
&bytes_count
);
//Receive the Next Row which is Distributed to the 16 Four Line Sector Buffers.
receive_post_line(STREAM_IN,
&LINE4_BUFFER_SECTOR0,
&LINE4_BUFFER_SECTOR1,
&LINE4_BUFFER_SECTOR2,
&LINE4_BUFFER_SECTOR3,
&LINE4_BUFFER_SECTOR4,
&LINE4_BUFFER_SECTOR5,
&LINE4_BUFFER_SECTOR6,
&LINE4_BUFFER_SECTOR7,
&LINE4_BUFFER_SECTOR8,
&LINE4_BUFFER_SECTOR9,
&LINE4_BUFFER_SECTOR10,
&LINE4_BUFFER_SECTOR11,
&LINE4_BUFFER_SECTOR12,
&LINE4_BUFFER_SECTOR13,
&LINE4_BUFFER_SECTOR14,
&LINE4_BUFFER_SECTOR15,
temp,
sector_iter
);
//Calculate which Should be Considered as the First Line for Producing the Next Processed Row.
first = (first+3)%4;
//Calculate which Should be Considered as the Second Line for Producing the Next Processed Row.
second = (second+3)%4;
//Calculate which Should be Considered as the Last Line for Producing the Next Processed Row.
last = (last+3)%4;
//Calculate where the New Received Row Should be Stored in the Four Line Sector Buffers.
temp = (temp+3)%4;
}
//The Previous for Loop Ended before Processing the Last Received Row.
//So, Process here the Last 3 Rows to Produce the Last Processed Row.
start_sobel_operations(&LINE4_BUFFER_SECTOR0,
&LINE4_BUFFER_SECTOR1,
&LINE4_BUFFER_SECTOR2,
&LINE4_BUFFER_SECTOR3,
&LINE4_BUFFER_SECTOR4,
&LINE4_BUFFER_SECTOR5,
&LINE4_BUFFER_SECTOR6,
&LINE4_BUFFER_SECTOR7,
&LINE4_BUFFER_SECTOR8,
&LINE4_BUFFER_SECTOR9,
&LINE4_BUFFER_SECTOR10,
&LINE4_BUFFER_SECTOR11,
&LINE4_BUFFER_SECTOR12,
&LINE4_BUFFER_SECTOR13,
&LINE4_BUFFER_SECTOR14,
&LINE4_BUFFER_SECTOR15,
&LINE1_BUFFER_SECTOR0,
&LINE1_BUFFER_SECTOR1,
&LINE1_BUFFER_SECTOR2,
&LINE1_BUFFER_SECTOR3,
&LINE1_BUFFER_SECTOR4,
&LINE1_BUFFER_SECTOR5,
&LINE1_BUFFER_SECTOR6,
&LINE1_BUFFER_SECTOR7,
&LINE1_BUFFER_SECTOR8,
&LINE1_BUFFER_SECTOR9,
&LINE1_BUFFER_SECTOR10,
&LINE1_BUFFER_SECTOR11,
&LINE1_BUFFER_SECTOR12,
&LINE1_BUFFER_SECTOR13,
&LINE1_BUFFER_SECTOR14,
&LINE1_BUFFER_SECTOR15,
sector_size,
first,
second,
last);
//Set the First Pixel of the First One Line Sector Buffer to be a Dark/Zero Pixel.
//NOTE the First Pixel of the First One Line Sector Buffer is Actually the First Pixel of a Row.
LINE1_BUFFER_SECTOR0.insert(zero_pixel, 0, 0);
//Set the Last Pixel of the Last One Line Sector Buffer to be a Dark/Zero Pixel.
//NOTE the Last Pixel of the Last One Line Sector Buffer is Actually the Last Pixel of a Row.
LINE1_BUFFER_SECTOR15.insert(zero_pixel, 0, sector_iter[15]-1);
//Send the Last Produced Row over the AXI Stream Out Interface.
send_line(STREAM_OUT,
&LINE1_BUFFER_SECTOR0,
&LINE1_BUFFER_SECTOR1,
&LINE1_BUFFER_SECTOR2,
&LINE1_BUFFER_SECTOR3,
&LINE1_BUFFER_SECTOR4,
&LINE1_BUFFER_SECTOR5,
&LINE1_BUFFER_SECTOR6,
&LINE1_BUFFER_SECTOR7,
&LINE1_BUFFER_SECTOR8,
&LINE1_BUFFER_SECTOR9,
&LINE1_BUFFER_SECTOR10,
&LINE1_BUFFER_SECTOR11,
&LINE1_BUFFER_SECTOR12,
&LINE1_BUFFER_SECTOR13,
&LINE1_BUFFER_SECTOR14,
&LINE1_BUFFER_SECTOR15,
sector_iter,
packet_mode_en,
packet_size,
&bytes_count
);
/*
* The Last Line/Row of an Image Processed with Sobel Edge Detection is Always Filled with Zero Pixels.
* So, Send the Last Row of Zero Pixels.
*/
send_last_line:
for (int col=0; col<cols; col++)
{
#pragma HLS PIPELINE II=1
AXI_PIXEL output_pixel; //Declare a AXI_PIXEL that Represents the AXI Stream Output Interface.
output_pixel.strb = 0xF; //Set the Strobe of the AXI Stream Interface so that all 4 Transmitted Bytes are Valid.
output_pixel.user = 0x1;
output_pixel.tdest = 0x1;
//Since this is the Last Row Check if this is the Last Pixel to Send.
if (col==cols-1 )
{
//Set the TLAST Signal to 1 to Indicate that this will be the Last Transmission of the Data.
output_pixel.last = 0x1;
}
//If the Packet Mode is Enabled then we Have to Set the TLAST to 1 if a Full Packet of Size packet_size is Transmitted.
else if(packet_mode_en == 1)
{
//The TLAST(last) Gets the Return Value of the is_packet_complete() which Returns 1 if the Number of Transmitted Bytes is Equal to the Packet Size.
output_pixel.last = is_packet_complete(&bytes_count, packet_size);
}
else
output_pixel.last = 0x0;
//Set the Data to Transmit to Have Zero Value Since we Transmit Zero Pixels.
output_pixel.data = 0x0;
//Forward the Data along with the Rest Signals to the AXI Stream Output Interface.
STREAM_OUT[col] = output_pixel;
}
bytes_count = 0; // Byte Counter
return 1;
}

View File

@@ -0,0 +1,31 @@
#ifndef _SOBEL_H_
#define _SOBEL_H_
#include "ap_bmp.h"
#include "ap_axi_sdata2.h"
#include "ap_int.h"
#include "ap_utils.h"
#include "ap_video.h"
#define MAX_WIDTH 1920
#define MAX_HEIGHT 1080
#define SECTORS 16
#define ABSDIFF(x,y) ((x>y)? x - y : y - x)
#define ABS(x) ((x>0)? x : -x)
#define RGB(r,g,b) ((((word)r)<<16)|(((word)g)<<8)|((word)b))
typedef ap_rgb <8, 8, 8> RGB;
typedef ap_axiu2 <32, 1, 1, 1> AXI_PIXEL;
typedef ap_linebuffer <unsigned char, 4, (MAX_WIDTH/SECTORS)+2> LINE4_SECTOR_BUFFER;
typedef ap_linebuffer <RGB, 1, (MAX_WIDTH/SECTORS)+1> LINE1_SECTOR_BUFFER;
int sobel_filter(AXI_PIXEL STREAM_IN[MAX_WIDTH],
AXI_PIXEL STREAM_OUT[MAX_WIDTH],
int rows,
int cols,
int packet_mode_en,
int packet_size);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,195 @@
#ifndef _SOBEL_OPERATIONS_H_
#define _SOBEL_OPERATIONS_H_
unsigned char rgb2y(RGB pix);
/*
* Template of the sobel_operator()
*
* The sobel_operator() Makes Sobel Computation Using a 3x3 Neighborhood
*/
template<int dummy_index>
RGB sobel_operator(LINE4_SECTOR_BUFFER *window,
unsigned int x_index,
unsigned int y_first,
unsigned int y_second,
unsigned int y_last)
{
#pragma HLS INLINE off
#pragma HLS EXPRESSION_BALANCE off
short x_weight = 0;
short y_weight = 0;
short x_weight_array[9];
#pragma HLS ARRAY_PARTITION variable=x_weight_array complete dim=1
short y_weight_array[9];
#pragma HLS ARRAY_PARTITION variable=y_weight_array complete dim=1
short edge_weight;
unsigned char edge_val;
RGB pixel;
const char x_op[3][3] = { {-1, 0, 1},
{-2, 0, 2},
{-1, 0, 1}};
#pragma HLS ARRAY_PARTITION variable=x_op complete dim=1
const char y_op[3][3] = { { 1, 2, 1},
{ 0, 0, 0},
{-1,-2,-1}};
#pragma HLS ARRAY_PARTITION variable=y_op complete dim=1
sobel_mul:
{
#pragma HLS PIPELINE II=1
//Compute Approximation of the Gradients in the X-Y Direction for the First Row of x_op and y_op.
for(char j = 0; j < 3; j++)
{
#pragma HLS UNROLL
#pragma HLS PIPELINE II=1
// X Direction Gradient
x_weight_array[j] = (window->getval(y_first,x_index + j) * x_op[0][j]);
// Y Direction Gradient
y_weight_array[j] = (window->getval(y_first,x_index + j) * y_op[0][j]);
}
//Compute Approximation of the Gradients in the X-Y Direction for the Second Row of x_op and y_op.
for(char j = 0; j < 3; j++)
{
#pragma HLS UNROLL
#pragma HLS PIPELINE II=1
// X Direction Gradient
x_weight_array[3+j] = (window->getval(y_second,x_index + j) * x_op[1][j]);
// Y Direction Gradient
y_weight_array[3+j] = (window->getval(y_second,x_index + j) * y_op[1][j]);
}
//Compute Approximation of the Gradients in the X-Y Direction for the Third Row of x_op and y_op.
for(char j = 0; j < 3; j++){
#pragma HLS UNROLL
#pragma HLS PIPELINE II=1
// X Direction Gradient
x_weight_array[6+j] = (window->getval(y_last,x_index + j) * x_op[2][j]);
// Y Direction Gradient
y_weight_array[6+j] = (window->getval(y_last,x_index + j) * y_op[2][j]);
}
}
for(char j = 0; j < 9; j++) {
#pragma HLS UNROLL
#pragma HLS PIPELINE II=1
// X Direction Gradient
x_weight += x_weight_array[j];
// Y Direction Gradient
y_weight += y_weight_array[j];
}
edge_weight = ABS(x_weight) + ABS(y_weight);
edge_val = (255-(unsigned char)(edge_weight));
//Edge Thresholding
if(edge_val > 200)
{
edge_val = 255;
}
else if(edge_val < 100)
{
edge_val = 0;
}
pixel.R = pixel.G = pixel.B = edge_val;
return pixel;
}
void start_sobel_operations(
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_0,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_1,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_2,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_3,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_4,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_5,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_6,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_7,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_8,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_9,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_10,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_11,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_12,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_13,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_14,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_15,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_0,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_1,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_2,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_3,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_4,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_5,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_6,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_7,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_8,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_9,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_10,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_11,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_12,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_13,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_14,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_15,
int sector_size,
int first,
int second,
int last);
void send_line(
AXI_PIXEL *STREAM_OUT,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_0,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_1,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_2,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_3,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_4,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_5,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_6,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_7,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_8,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_9,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_10,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_11,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_12,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_13,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_14,
LINE1_SECTOR_BUFFER *OUTPUT_BUFFER_SECTOR_15,
int *sector_iter_array,
int packet_mode_enable,
int packet_size,
int *remain_bytes);
void receive_post_line(
AXI_PIXEL *STREAM_IN,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_0,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_1,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_2,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_3,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_4,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_5,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_6,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_7,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_8,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_9,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_10,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_11,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_12,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_13,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_14,
LINE4_SECTOR_BUFFER *LINE_BUFFER_SECTOR_15,
int row,
int *sector_iter_array);
#endif

View File

@@ -0,0 +1,49 @@
##################################################################################
# #
# This Script #
# 1.Creates a New Vivado Project #
# 2.Generates the Block Design Described in "pcie_acceleration_vc707_design.tcl" #
# 3.Imports the Required Constraint File "constraints.xdc" #
# 4.Imports the Required HDL Wrapper File "hdl_wrapper.v" #
# #
##################################################################################
set relative_directory [pwd]
set project_directory $relative_directory/pcie_acceleration_vc707
set ip_repository $relative_directory/Vivado_HLS_IPs
set constraints_directory $relative_directory/Constraints
set hdl_wrapper_directory $relative_directory/HDL_Wrapper
set block_design_directory $relative_directory/Vivado_Block_Design
set src_bd_design_directory $relative_directory/pcie_acceleration_vc707/pcie_acceleration_vc707.srcs/sources_1/bd/pcie_acceleration_vc707_design
#Create a New Project Named "pcie_accel_demo"
create_project pcie_accel_demo $project_directory -part xc7vx485tffg1761-2
#Set the Board Part which is Required for Certain Configurations such as the Uartlite Controller (RS-232)
set_property board_part xilinx.com:vc707:part0:1.2 [current_project]
#Add the HLS IPs before Opening the Block Design
set_property ip_repo_paths {Vivado_HLS_IPs/Acceleration_Scheduler_Direct Vivado_HLS_IPs/Acceleration_Scheduler_Indirect Vivado_HLS_IPs/Acceleration_Scheduler_SG_XDMA Vivado_HLS_IPs/DMA_SG_PCIe_Scheduler Vivado_HLS_IPs/Fetch_Scheduler Vivado_HLS_IPs/Interrupt_Manager Vivado_HLS_IPs/Info_Memory_Block Vivado_HLS_IPs/Send_Scheduler Vivado_HLS_IPs/Sobel_Filter} [current_project]
update_ip_catalog
#Add the Block Design
source $block_design_directory/pcie_acceleration_vc707_design.tcl
#Add Constraint Files
add_files -fileset constrs_1 -norecurse $constraints_directory/constraints.xdc
import_files -fileset constrs_1 $constraints_directory/constraints.xdc
#Add the HDL Wrapper
add_files -norecurse -scan_for_includes $hdl_wrapper_directory/hdl_wrapper.v
import_files -norecurse $hdl_wrapper_directory/hdl_wrapper.v
update_compile_order -fileset sources_1
update_compile_order -fileset sources_1
update_compile_order -fileset sim_1

0
Images/.keep Normal file
View File

BIN
Images/system_overview.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

112
README.md Normal file
View File

@@ -0,0 +1,112 @@
# FPGA Hardware Acceleration over PCIe
## What This Is
Multi-threaded Linux application + custom kernel driver + FPGA hardware design that accelerates Sobel edge detection on images. Demonstrates full-stack embedded systems engineering from RTL to application layer.
![System Overview][system_overview]
## Project Summary
This project implements a **full-stack hardware acceleration platform** that offloads compute-intensive image processing tasks from a multi-threaded Linux application to custom FPGA accelerators connected via PCIe. The system demonstrates advanced concepts in **computer architecture, hardware-software co-design, parallel processing, and driver development**.
### Key Achievement
Successfully designed and implemented a multi-acceleration-group architecture supporting **concurrent hardware acceleration** for up to **16 simultaneous threads**, with intelligent resource scheduling and DMA-based data transfers.
- **See [Engineering Challenges Solved](#engineering-challenges-solved)**
## Technical Overview
**Hardware (Xilinx Virtex-7 FPGA)**
- 7 parallel acceleration units supporting up to 16 concurrent threads
- Custom IP cores designed in C/C++ (Vivado HLS), synthesized to RTL
- PCIe Gen2 x4 interface with DMA engines for high-throughput data transfer
- Sobel filter accelerator processing up to 1080p images
**Software (Linux)**
- **Kernel driver**: PCIe device management, MSI interrupts, multi-thread resource scheduling
- **User application**: pthreads, memory-mapped I/O, DMA buffer management
- **MicroBlaze firmware**: FPGA system initialization
---
## Architecture Highlights
```
┌─────────────────────────────────────────┐
│ Multi-threaded Application (pthreads) │
└──────────────┬──────────────────────────┘
│ ioctl(), mmap()
┌──────────────▼──────────────────────────┐
│ Kernel Driver (Resource Scheduler) │ ← Thread arbitration, DMA setup
└──────────────┬──────────────────────────┘
│ PCIe, MSI Interrupts
┌──────────────▼──────────────────────────┐
│ FPGA Hardware (7 Accel Groups) │ ← Parallel processing
│ • Fetch/Send Schedulers (DMA) │
│ • Sobel Filter Accelerators │
│ • Interrupt Manager │
└─────────────────────────────────────────┘
```
**Why 7 acceleration groups?**
- 2 Direct-mode (PCIe → BRAM, low latency)
- 4 Indirect-mode (PCIe → DDR3, higher throughput)
- 1 Scatter-Gather (supports fragmented user memory)
Each can process different images simultaneously with driver-managed scheduling.
---
## Engineering Challenges Solved
**1. Multi-thread resource arbitration**
16 threads competing for 7 hardware units → Implemented two scheduling policies (greedy, best-available) in kernel driver with per-thread state tracking
**2. PCIe interrupt routing**
Designed custom Interrupt Manager IP to map 7 accelerators to MSI vectors, coordinated with GPIO-triggered interrupts
**3. Zero-copy DMA from userspace**
Used `get_user_pages()` + scatter-gather tables for direct DMA to/from application buffers without memcpy overhead
**4. Hardware-software timing correlation**
FPGA global timer accessible via memory-mapped registers for nanosecond-precision performance analysis
---
## Results
- **Throughput**: Supports 16 concurrent requests with linear scaling up to 7 threads
- **Latency**: ~50-100 μs for VGA images (640x480)
---
## Quick Start
```bash
# Generate custom IPs (one-time)
cd Hardware/Vivado_HLS_IPs/Sobel_Filter && vivado_hls run_hls.tcl
# ... repeat for 8 other IPs
# Build bitstream
cd Hardware && vivado -source create_project.tcl
# Flow → Generate Bitstream
# Load driver & run
cd Software/Linux_App_Driver
make
./make_device
insmod xilinx_pci_driver.ko
./ui image. bmp 100 16 1 10 # 100 iterations, 16 threads
```
---
## Repository Structure
```
Hardware/Vivado_HLS_IPs/ 9 custom IP cores (C++ → RTL)
Hardware/Vivado_Block_Design/ System integration (AXI, PCIe, DDR3)
Software/Linux_App_Driver/ Kernel driver + test application
Software/Microblaze_XSDK/ FPGA firmware
```
[system_overview]: /Images/system_overview.png "System Overview Diagram"

0
Software/.keep Normal file
View File

View File

View File

@@ -0,0 +1,13 @@
obj-m := xilinx_pci_driver.o
XILINX_PCI_DRIVER_HOME := $(shell pwd)
XILINX_PCI_DRIVER_KVER := $(shell uname -r)
all:
make -C /lib/modules/$(XILINX_PCI_DRIVER_KVER)/build M=$(XILINX_PCI_DRIVER_HOME) modules
g++ ui.cpp -o ui -pthread
clean:
make -C /lib/modules/$(XILINX_PCI_DRIVER_KVER)/build M=$(XILINX_PCI_DRIVER_HOME) clean

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 225 KiB

View File

@@ -0,0 +1 @@
0

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

View File

@@ -0,0 +1,7 @@
#!/bin/bash
mount -t debugfs none /sys/kernel/debug/
rm -rf /dev/xilinx_pci_driver
mknod /dev/xilinx_pci_driver c 240 1
chown root /dev/xilinx_pci_driver
chmod 0644 /dev/xilinx_pci_driver
ls -al /dev/xilinx_pci_driver

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,551 @@
/**
*
* This Header File Contains the Necessary Macros for the Kernel Driver Module and the Userspace Application
*
*/
//---Valid Macros To Keep---------------------------------------------//
#define KC705_PCI_VENDOR_ID 0x10EE
#define KC705_PCI_DEVICE_ID 0x7021
#define VC707_PCI_VENDOR_ID 0x10EE
#define VC707_PCI_DEVICE_ID 0x7022
#define VENDOR_ID VC707_PCI_VENDOR_ID
#define DEVICE_ID VC707_PCI_DEVICE_ID
#define HAVE_REGION 0x01 // I/O Memory region
#define HAVE_IRQ 0x02 // Interupt
#define HAVE_KREG 0x04 // Kernel Registration
#define HAVE_DEBUGFS 0x08 // Debugfs File Creation
#define DEFAULT_SIGNAL_0 34
#define DEFAULT_SIGNAL_1 35
#define DEFAULT_SIGNAL_2 36
#define DEFAULT_SIGNAL_3 37
#define DEFAULT_SIGNAL_4 38
#define DEFAULT_SIGNAL_5 39
#define DEFAULT_SIGNAL_6 40
#define DEFAULT_SIGNAL_SG 41
#define OCCUPIED 1
#define NOT_OCCUPIED 0
/**
* @note
*
* The Following Macro Line Works as a Switch.
* Remove/Add One of the '*' at the Beggining to Change the State of the Switch.
* One '*' Enables --> BEST_AVAILABLE.
* Two '*' Enable --> GREEDY.
* This is Used to Safely Disable/Enable Specific Code Parts of the Driver.
*/
/*/ #define BEST_AVAILABLE /*/ #define GREEDY /**/
/** @note
*
* The Macros Below are Used to Enable/Disable Debug Messages.
*
* The DEBUG_MESSAGES is Used to Print the Driver's Debug Messages to the /var/log/kern.log File.
* The DEBUG_MESSAGES_UI is Used to Print the Userspace Application's Debug messages to the Terminal.
*
* Uncomment to Enable the Messages Debugging.
*/
//#define DEBUG_MESSAGES
//#define DEBUG_MESSAGES_UI
#define SUCCESS 0
#define FAILURE 1
#define BYTE 1
#define KBYTE 1024
#define MBYTE 1048576
#define START 0x1
#define ACK 0x1
#define MMAP_ALLOCATION_SIZE 4 * MBYTE
#define POSIX_ALLOCATED_SIZE 32 * MBYTE
#define KERNEL_ALLOCATION_SIZE 4 * MBYTE
#define OPERATION_START_TIMER 0x18000000
#define BAR0_32BIT 0 //For 32 Bit Addressing
#define BAR1_32BIT 1 //For 32 Bit Addressing
#define BAR2_32BIT 2 //For 32 Bit Addressing
#define BAR3_32BIT 3 //For 32 Bit Addressing
#define BAR4_32BIT 4 //For 32 Bit Addressing
#define BAR5_32BIT 5 //For 32 Bit Addressing
#define BAR0_64BIT 0 //For 64 Bit Addressing
#define BAR1_64BIT 2 //For 64 Bit Addressing
#define BAR2_64BIT 4 //For 64 Bit Addressing
#define ACCELERATOR_DIRECT_0_OCCUPIED 0x01
#define ACCELERATOR_DIRECT_1_OCCUPIED 0x02
#define ACCELERATOR_INDIRECT_0_OCCUPIED 0x04
#define ACCELERATOR_INDIRECT_1_OCCUPIED 0x08
#define ACCELERATOR_INDIRECT_2_OCCUPIED 0x10
#define ACCELERATOR_INDIRECT_3_OCCUPIED 0x20
#define ACCELERATOR_SG_OCCUPIED 0x40
#define ACCELERATOR_ALL_OCCUPIED 0x3F
#define ACCELERATOR_NO_OCCUPIED 0x00
#define ENABLE_GCC_MC 0x00010001 //Enable Global Clock Counter and Metrics Counter Mask
#define RESET_GCC_MC 0x00020002 //Reset Global Clock Counter and Metrics Counter Mask
#define APM_CR_OFFSET 0x300 //AXI Performance Monitor Control Register Offset(0x60 for Long Int Offset 0x300 for Byte Offset)
#define APM_GCC_LOWER_OFFSET 0X0004 //Global Clock Counter Lower 32Bits Register
#define APM_GCC_UPPER_OFFSET 0X0000 //Global Clock Counter Upper 32Bits Register
#define METRIC_SELECTOR_REGISTER_0_OFFSET 0X0044
#define METRIC_SELECTOR_REGISTER_1_OFFSET 0X0048
#define METRIC_SELECTOR_REGISTER_2_OFFSET 0X004C
//////////////////////////////////////////////////////////////////////////////////////
// PCIe BAR0 Address Space -Mapping the FPGA AXI Address Space (HW Peripherals)
//////////////////////////////////////////////////////////////////////////////////////
#define BAR0_OFFSET_INTERRUPT_CONTROLLER 0x00020000
#define BAR0_OFFSET_UARTLITE 0x00010000
#define BAR0_OFFSET_PCIE_CTL 0x00020000
#define BAR0_OFFSET_GPIO_PCIE_INTERRUPT 0x00030000
#define BAR0_OFFSET_GPIO_MSI 0x00040000
#define BAR0_OFFSET_TIMER 0x00050000
#define BAR0_OFFSET_FETCH_SCHEDULER 0x00060000
#define BAR0_OFFSET_SEND_SCHEDULER 0x00070000
#define BAR0_OFFSET_SCHEDULER_BUFFER_FETCH 0x00080000
#define BAR0_OFFSET_SCHEDULER_BUFFER_SEND 0x00090000
#define BAR0_OFFSET_CDMA_FETCH 0x000A0000
#define BAR0_OFFSET_CDMA_SEND 0x000B0000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_ACCELERATION_SCHEDULER_DIRECT 0x000C0000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_APM 0x000D0000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_DMA 0x000E0000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_0_SOBEL_FILTER 0x000F0000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_ACCELERATION_SCHEDULER_DIRECT 0x00100000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_APM 0x00110000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_DMA 0x00120000
#define BAR0_OFFSET_ACCEL_GROUP_DIRECT_1_SOBEL_FILTER 0x00130000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_ACCELERATION_SCHEDULER_INDIRECT 0x00140000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_APM 0x00150000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_DMA 0x00160000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_0_SOBEL_FILTER 0x00170000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_ACCELERATION_SCHEDULER_INDIRECT 0x00180000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_APM 0x00190000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_DMA 0x001A0000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_1_SOBEL_FILTER 0x001B0000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_ACCELERATION_SCHEDULER_INDIRECT 0x001C0000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_APM 0x001D0000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_DMA 0x001E0000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_2_SOBEL_FILTER 0x001F0000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_ACCELERATION_SCHEDULER_INDIRECT 0x00200000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_APM 0x00210000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_DMA 0x00220000
#define BAR0_OFFSET_ACCEL_GROUP_INDIRECT_3_SOBEL_FILTER 0x00230000
#define BAR0_OFFSET_ACCEL_GROUP_SG_ACCELERATION_SCHEDULER_SG 0x00240000
#define BAR0_OFFSET_ACCEL_GROUP_SG_APM 0x00250000
#define BAR0_OFFSET_ACCEL_GROUP_SG_DMA_SG_PCIE_SCHEDULER 0x00260000
#define BAR0_OFFSET_ACCEL_GROUP_SG_SOBEL_FILTER_4K 0x00280000
#define BAR0_OFFSET_ACCEL_GROUP_SG_DMA 0x00290000
#define BAR0_OFFSET_GPIO_MSI_READ 0x00300000
#define BAR0_OFFSET_INTERRUPT_MANAGER 0x00310000
#define BAR0_OFFSET_GPIO_ACK 0x00320000
//////////////////////////////////////////////////////////////////////////////////////
// Acceleration Scheduler Direct Register Offsets
//////////////////////////////////////////////////////////////////////////////////////
#define ACCELERATION_SCHEDULER_DIRECT_CONTROL_REGISTER_OFFSET 0X00
#define ACCELERATION_SCHEDULER_DIRECT_GIE_REGISTER_OFFSET 0X04
#define ACCELERATION_SCHEDULER_DIRECT_IER_REGISTER_OFFSET 0X08
#define ACCELERATION_SCHEDULER_DIRECT_ISR_REGISTER_OFFSET 0X0C
#define ACCELERATION_SCHEDULER_DIRECT_DMA_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X18
#define ACCELERATION_SCHEDULER_DIRECT_SOBEL_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X20
#define ACCELERATION_SCHEDULER_DIRECT_GPIO_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X28
#define ACCELERATION_SCHEDULER_DIRECT_APM_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X30
#define ACCELERATION_SCHEDULER_DIRECT_SHARED_APM_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X38
#define ACCELERATION_SCHEDULER_DIRECT_SHARED_METRICS_DEVICE_BASE_ADDRESS_REGISTER_OFFSET 0X40
#define ACCELERATION_SCHEDULER_DIRECT_IMAGE_COLUMNS_REGISTER_OFFSET 0X48
#define ACCELERATION_SCHEDULER_DIRECT_IMAGE_ROWS_REGISTER_OFFSET 0X50
#define ACCELERATION_SCHEDULER_DIRECT_HOST_SOURCE_ADDRESS_REGISTER_OFFSET 0X58
#define ACCELERATION_SCHEDULER_DIRECT_HOST_DESTINATION_ADDRESS_REGISTER_OFFSET 0X60
#define ACCELERATION_SCHEDULER_DIRECT_INITIATOR_GROUP_REGISTER_OFFSET 0X68
//////////////////////////////////////////////////////////////////////////////////////
// Acceleration Scheduler Indirect Register Offsets
//////////////////////////////////////////////////////////////////////////////////////
#define ACCELERATION_SCHEDULER_INDIRECT_CONTROL_REGISTER_OFFSET 0x00
#define ACCELERATION_SCHEDULER_INDIRECT_GIE_REGISTER_OFFSET 0x04
#define ACCELERATION_SCHEDULER_INDIRECT_IER_REGISTER_OFFSET 0x08
#define ACCELERATION_SCHEDULER_INDIRECT_ISR_REGISTER_OFFSET 0x0C
#define ACCELERATION_SCHEDULER_INDIRECT_SCHEDULER_BUFFER_BASE_ADDRESS_FETCH_REGISTER_OFFSET 0x18
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_REG_FETCH_REGISTER_OFFSET 0x20
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_REG_FETCH_REGISTER_OFFSET 0x28
#define ACCELERATION_SCHEDULER_INDIRECT_DATA_SIZE_REG_FETCH_REGISTER_OFFSET 0x30
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_REG__FETCH__REGISTER_OFFSET 0x38
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_FETCH_REGISTER_OFFSET 0x40
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_FETCH_REGISTER_OFFSET 0x48
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_FETCH_REGISTER_OFFSET 0x50
#define ACCELERATION_SCHEDULER_INDIRECT_SCHEDULER_BUFFER_BASE_ADDRESS_SEND_REGISTER_OFFSET 0x58
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_REG_SEND_REGISTER_OFFSET 0x60
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_REG_SEND_REGISTER_OFFSET 0x68
#define ACCELERATION_SCHEDULER_INDIRECT_DATA_SIZE_REG_SEND_REGISTER_OFFSET 0x70
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_REG__SEND_REGISTER_OFFSET 0x78
#define ACCELERATION_SCHEDULER_INDIRECT_SOURCE_ADDRESS_SEND_REGISTER_OFFSET 0x80
#define ACCELERATION_SCHEDULER_INDIRECT_DESTINATION_ADDRESS_SEND_REGISTER_OFFSET 0x88
#define ACCELERATION_SCHEDULER_INDIRECT_OFFSET_SEND_REGISTER_OFFSET 0x90
#define ACCELERATION_SCHEDULER_INDIRECT_DMA_BASE_ADDRESS_REGISTER_OFFSET 0x98
#define ACCELERATION_SCHEDULER_INDIRECT_SOBEL_BASE_ADDRESS_REGISTER_OFFSET 0xA0
#define ACCELERATION_SCHEDULER_INDIRECT_IMAGE_COLUMNS_REGISTER_OFFSET 0xA8
#define ACCELERATION_SCHEDULER_INDIRECT_IMAGE_ROWS_REGISTER_OFFSET 0xB0
#define ACCELERATION_SCHEDULER_INDIRECT_ACCEL_GROUP_REGISTER_OFFSET 0xB8
#define ACCELERATION_SCHEDULER_INDIRECT_SHARED_APM_BASE_ADDRESS_REGISTER_OFFSET 0xC0
#define ACCELERATION_SCHEDULER_INDIRECT_SHARED_METRICS_BASE_ADDRESS_REGISTER_OFFSET 0xC8
#define ACCELERATION_SCHEDULER_INDIRECT_APM_BASE_ADDRESS_REGISTER_OFFSET 0xD0
//////////////////////////////////////////////////////////////////////////////////////
// Acceleration Scheduler SG Register Offsets
//////////////////////////////////////////////////////////////////////////////////////
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_AP_CTRL 0x00
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_GIE 0x04
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_IER 0x08
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_ISR 0x0c
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_AP_RETURN 0x10
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_DMA_SG_PCIE_SCHEDULER_BASE_ADDRESS_DATA 0x18
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_SOBEL_DEVICE_ADDRESS_DATA 0x20
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_GPIO_DEVICE_ADDRESS_DATA 0x28
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_APM_DEVICE_ADDRESS_DATA 0x30
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_SHARED_APM_DEVICE_ADDRESS_DATA 0x38
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_SHARED_METRICS_ADDRESS_DATA 0x40
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_IMAGE_COLS_DATA 0x48
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_IMAGE_ROWS_DATA 0x50
#define XACCELERATION_SCHEDULER_SG_XDMA_CFG_ADDR_ACCEL_GROUP_DATA 0x58
//////////////////////////////////////////////////////////////////////////////////////
// AXI BARs Offsets
//////////////////////////////////////////////////////////////////////////////////////
#define AXI_BAR_0_OFFSET 0x20000000
#define AXI_BAR_1_OFFSET 0x30000000
#define AXI_BAR_2_OFFSET 0x40000000
#define AXI_BAR_3_OFFSET 0x50000000
#define AXI_BAR_4_OFFSET 0x60000000
#define AXI_BAR_5_OFFSET 0x70000000
//////////////////////////////////////////////////////////////////////////////////////
// AXI BARs Dynamic Address Translation Registers Offsets
//////////////////////////////////////////////////////////////////////////////////////
#define AXI_BAR0_LOWER_ADDRESS_OFFSET 0x20C
#define AXI_BAR0_UPPER_ADDRESS_OFFSET 0x208
#define AXI_BAR1_LOWER_ADDRESS_OFFSET 0x214
#define AXI_BAR1_UPPER_ADDRESS_OFFSET 0x210
#define AXI_BAR2_LOWER_ADDRESS_OFFSET 0x21C
#define AXI_BAR2_UPPER_ADDRESS_OFFSET 0x218
#define AXI_BAR3_LOWER_ADDRESS_OFFSET 0x224
#define AXI_BAR3_UPPER_ADDRESS_OFFSET 0x220
#define AXI_BAR4_LOWER_ADDRESS_OFFSET 0x22C
#define AXI_BAR4_UPPER_ADDRESS_OFFSET 0x228
#define AXI_BAR5_LOWER_ADDRESS_OFFSET 0x234
#define AXI_BAR5_UPPER_ADDRESS_OFFSET 0x230
//////////////////////////////////////////////////////////////////////////////////////
// IOCtl Commands
//////////////////////////////////////////////////////////////////////////////////////
#define COMMAND_REQUEST_ACCELERATOR_ACCESS 0x0100
#define COMMAND_REQUEST_ACCELERATOR_SG_ACCESS 0x0200
#define COMMAND_SET_PAGES 0x0300
#define COMMAND_UNMAP_PAGES 0x0400
#define COMMAND_RESET_VARIABLES 0x0500
//////////////////////////////////////////////////////////////////////////////////////
// Scenarios
//////////////////////////////////////////////////////////////////////////////////////
#define SCENARIO_SCATTER_GATHER 1
#define SCENARIO_WORST_CASE 2
#define SCENARIO_WORST_CASE_CDMA 3
struct image_info
{
uint32_t rows;
uint32_t columns;
uint64_t size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
uint32_t apm_read_transactions; //Offset 0 Bytes
uint32_t apm_read_bytes; //Offset 4 Bytes
uint32_t apm_write_transactions; //Offset 8 Bytes
uint32_t apm_write_bytes; //Offset 12 Bytes
uint32_t apm_packets; //Offset 16 Bytes
uint32_t apm_bytes; //Offset 20 Bytes
uint32_t apm_gcc_l; //Offset 24 Bytes
uint32_t apm_gcc_u; //Offset 28 Bytes
uint32_t cdma_fetch_time_start_l; //Offset 32 Bytes
uint32_t cdma_fetch_time_start_u; //Offset 36 Bytes
uint32_t cdma_fetch_time_end_l; //Offset 40 Bytes
uint32_t cdma_fetch_time_end_u; //Offset 44 Bytes
uint32_t cdma_send_time_start_l; //Offset 48 Bytes
uint32_t cdma_send_time_start_u; //Offset 52 Bytes
uint32_t cdma_send_time_end_l; //Offset 56 Bytes
uint32_t cdma_send_time_end_u; //Offset 60 Bytes
uint32_t dma_accel_time_start_l; //Offset 64 Bytes
uint32_t dma_accel_time_start_u; //Offset 68 Bytes
uint32_t dma_accel_time_end_l; //Offset 72 Bytes
uint32_t dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
uint64_t total_time_start;
uint64_t total_time_end;
uint64_t sleep_time_start;
uint64_t sleep_time_end;
uint64_t preparation_time_start;
uint64_t preparation_time_end;
uint64_t load_time_start;
uint64_t load_time_end;
uint64_t save_time_start;
uint64_t save_time_end;
};
struct metrics_per_process
{
struct metrics agd0;
struct metrics agd1;
struct metrics agi0;
struct metrics agi1;
struct metrics agi2;
struct metrics agi3;
struct metrics agsg;
/*
* Kernel and Userspace Metrics
*/
uint64_t total_time_start;
uint64_t total_time_end;
uint64_t sleep_time_start;
uint64_t sleep_time_end;
uint64_t preparation_time_start;
uint64_t preparation_time_end;
uint64_t load_time_start;
uint64_t load_time_end;
uint64_t save_time_start;
uint64_t save_time_end;
uint64_t set_pages_overhead_time_start;
uint64_t set_pages_overhead_time_end;
uint64_t unmap_pages_overhead_time_start;
uint64_t unmap_pages_overhead_time_end;
};
struct status_flags
{
uint32_t accel_direct_0_occupied_pid;
uint32_t accel_direct_1_occupied_pid;
uint32_t accel_indirect_0_occupied_pid;
uint32_t accel_indirect_1_occupied_pid;
uint32_t accel_indirect_2_occupied_pid;
uint32_t accel_indirect_3_occupied_pid;
uint32_t accel_sg_0_occupied_pid;
uint32_t accelerator_busy;
uint32_t open_modules;
uint32_t agd0_busy;
uint32_t agd1_busy;
uint32_t agi0_busy;
uint32_t agi1_busy;
uint32_t agi2_busy;
uint32_t agi3_busy;
uint32_t agsg_busy;
};
struct shared_repository
{
struct metrics unused_shared_metrics;
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};
struct shared_repository_process
{
struct metrics_per_process process_metrics;
struct image_info shared_image_info;
int accel_completed;
int accel_occupied;
int image_segments;
};
typedef struct {
uint8_t magic[2];
} bmpfile_magic_t;
typedef struct {
uint32_t filesz;
uint16_t creator1;
uint16_t creator2;
uint32_t bmp_offset;
} bmpfile_header_t;
typedef struct {
uint32_t header_sz;
int32_t width;
int32_t height;
uint16_t nplanes;
uint16_t bitspp;
uint32_t compress_type;
uint32_t bmp_bytesz;
int32_t hres;
int32_t vres;
uint32_t ncolors;
uint32_t nimpcolors;
} bitmap_info_header_t;
typedef struct {
uint8_t b;
uint8_t g;
uint8_t r;
uint8_t nothing;
} rgb_t;
typedef unsigned char pixel_t;
struct pid_reserved_memories {
pid_t pid;
struct shared_repository_process *shared_repo_virtual_address;
uint32_t shared_repo_physical_address;
uint64_t *pre_process_mmap_virtual_address;
uint32_t pre_process_mmap_physical_address;
uint64_t *post_process_mmap_virtual_address;
uint32_t post_process_mmap_physical_address;
struct sg_table *dma_sg_table_source;
struct scatterlist *scatterlist_pointer_source;
int buffer_dma_buffers_source;
int buffer_mapped_pages_source;
struct sg_table *dma_sg_table_destination;
struct scatterlist *scatterlist_pointer_destination;
int buffer_dma_buffers_destination;
int buffer_mapped_pages_destination;
uint64_t *u64_sg_list_source;
uint64_t *u64_sg_list_destination;
struct pid_reserved_memories *next_pid;
};
struct sg_list_addresses
{
pid_t current_pid;
uint64_t *sg_list_source_address;
uint64_t *sg_list_destination_address;
};
struct per_thread_info
{
struct shared_repository_process *shared_repo_kernel_address;
uint8_t *u8_pre_process_kernel_address;
uint8_t *u8_post_process_kernel_address;
int pre_process_mmap_file;
int post_process_mmap_file;
int shared_repo_mmap_file;
};

View File

View File

@@ -0,0 +1,221 @@
/*******************************************************************/
/* */
/* This file is automatically generated by linker script generator.*/
/* */
/* Version: */
/* */
/* Copyright (c) 2010 Xilinx, Inc. All rights reserved. */
/* */
/* Description : MicroBlaze Linker Script */
/* */
/*******************************************************************/
_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x400;
_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x400;
/* Define Memories in the system */
MEMORY
{
microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr : ORIGIN = 0x50, LENGTH = 0x3FFB0
pcie : ORIGIN = 0x10020000, LENGTH = 0x10000
pcie_AXIBAR_0 : ORIGIN = 0x20000000, LENGTH = 0x400000
pcie_AXIBAR_1 : ORIGIN = 0x30000000, LENGTH = 0x400000
pcie_AXIBAR_2 : ORIGIN = 0x40000000, LENGTH = 0x400000
pcie_AXIBAR_3 : ORIGIN = 0x50000000, LENGTH = 0x400000
pcie_AXIBAR_4 : ORIGIN = 0x60000000, LENGTH = 0x1000
pcie_AXIBAR_5 : ORIGIN = 0x70000000, LENGTH = 0x1000
mig : ORIGIN = 0x80000000, LENGTH = 0x20000000
shared_metrics_bram_controller_S_AXI_BASEADDR : ORIGIN = 0xC0000000, LENGTH = 0x40000
}
/* Specify the default entry point to the program */
ENTRY(_start)
/* Define the sections, and where they are mapped in memory */
SECTIONS
{
.vectors.reset 0x0 : {
KEEP (*(.vectors.reset))
}
.vectors.sw_exception 0x8 : {
KEEP (*(.vectors.sw_exception))
}
.vectors.interrupt 0x10 : {
KEEP (*(.vectors.interrupt))
}
.vectors.hw_exception 0x20 : {
KEEP (*(.vectors.hw_exception))
}
.text : {
*(.text)
*(.text.*)
*(.gnu.linkonce.t.*)
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.init : {
KEEP (*(.init))
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.fini : {
KEEP (*(.fini))
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.ctors : {
__CTOR_LIST__ = .;
___CTORS_LIST___ = .;
KEEP (*crtbegin.o(.ctors))
KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
__CTOR_END__ = .;
___CTORS_END___ = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.dtors : {
__DTOR_LIST__ = .;
___DTORS_LIST___ = .;
KEEP (*crtbegin.o(.dtors))
KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
PROVIDE(__DTOR_END__ = .);
PROVIDE(___DTORS_END___ = .);
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.rodata : {
__rodata_start = .;
*(.rodata)
*(.rodata.*)
*(.gnu.linkonce.r.*)
__rodata_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.sdata2 : {
. = ALIGN(8);
__sdata2_start = .;
*(.sdata2)
*(.sdata2.*)
*(.gnu.linkonce.s2.*)
. = ALIGN(8);
__sdata2_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.sbss2 : {
__sbss2_start = .;
*(.sbss2)
*(.sbss2.*)
*(.gnu.linkonce.sb2.*)
__sbss2_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.data : {
. = ALIGN(4);
__data_start = .;
*(.data)
*(.data.*)
*(.gnu.linkonce.d.*)
__data_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.got : {
*(.got)
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.got1 : {
*(.got1)
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.got2 : {
*(.got2)
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.eh_frame : {
*(.eh_frame)
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.jcr : {
*(.jcr)
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.gcc_except_table : {
*(.gcc_except_table)
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.sdata : {
. = ALIGN(8);
__sdata_start = .;
*(.sdata)
*(.sdata.*)
*(.gnu.linkonce.s.*)
__sdata_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.sbss (NOLOAD) : {
. = ALIGN(4);
__sbss_start = .;
*(.sbss)
*(.sbss.*)
*(.gnu.linkonce.sb.*)
. = ALIGN(8);
__sbss_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.tdata : {
__tdata_start = .;
*(.tdata)
*(.tdata.*)
*(.gnu.linkonce.td.*)
__tdata_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.tbss : {
__tbss_start = .;
*(.tbss)
*(.tbss.*)
*(.gnu.linkonce.tb.*)
__tbss_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.bss (NOLOAD) : {
. = ALIGN(4);
__bss_start = .;
*(.bss)
*(.bss.*)
*(.gnu.linkonce.b.*)
*(COMMON)
. = ALIGN(4);
__bss_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 );
_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 );
/* Generate Stack and Heap definitions */
.heap (NOLOAD) : {
. = ALIGN(8);
_heap = .;
_heap_start = .;
. += _HEAP_SIZE;
_heap_end = .;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
.stack (NOLOAD) : {
_stack_end = .;
. += _STACK_SIZE;
. = ALIGN(8);
_stack = .;
__stack = _stack;
} > microblaze_bram_ilmb_bram_if_cntlr_microblaze_bram_dlmb_bram_if_cntlr
_end = .;
}

View File

@@ -0,0 +1,84 @@
#include "stdio.h"
#include "string.h"
#include "stdlib.h"
#include "platform.h"
#include "xil_exception.h"
#include "xparameters.h"
#include "xstatus.h"
#define KBYTE 1024
/*
* Functions Declaration
*/
int setup_acceleration_scheduler_sg();
int setup_dma_sg_schedulers();
int setup_acceleration_schedulers_direct();
int setup_acceleration_schedulers_indirect();
int setup_fetch_scheduler();
int setup_send_scheduler();
int setup_scheduler_buffers();
int setup_cdmas();
int setup_dmas();
int setup_apms();
int setup_shared_apm();
int setup_gpio();
int setup_pcie();
int setup_sobel_filters();
int setup_interrupt_manager();
int setup_interrupts();
//The Base Address of the FPGA's BRAM (256K).
int *bram_base_address = (int *)XPAR_SHARED_METRICS_BRAM_CONTROLLER_S_AXI_BASEADDR;
int main()
{
int repeat;
//Clear the Terminal Screen.
xil_printf("%c[2J",27);
//Initialize the Platform.
init_platform();
//Clear the FPGA's BRAM.
for(repeat = 0; repeat < (256 * KBYTE) / 4; repeat++)
{
bram_base_address[repeat] = 0;
}
/*
* Setup ALL the Peripherals of the FPGA.
*/
setup_acceleration_schedulers_direct();
setup_acceleration_schedulers_indirect();
setup_fetch_scheduler();
setup_send_scheduler();
setup_scheduler_buffers();
setup_cdmas();
setup_dmas();
setup_apms();
setup_shared_apm();
setup_gpio();
setup_pcie();
setup_sobel_filters();
setup_acceleration_scheduler_sg();
setup_dma_sg_schedulers();
setup_interrupt_manager();
//Setup the Interrupt Controller and the Interrupts.
setup_interrupts();
print("\r\n-->System is Ready\r\n");
//Start an Infinite Loop to Keep the System Alive.
while(1)
{
}
return XST_SUCCESS;
}

View File

@@ -0,0 +1,103 @@
/******************************************************************************
*
* Copyright (C) 2010 - 2014 Xilinx, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* Use of the Software is limited solely to applications:
* (a) running on a Xilinx device, or
* (b) that interact with a Xilinx device through a bus or interconnect.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* XILINX CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Except as contained in this notice, the name of the Xilinx shall not be used
* in advertising or otherwise to promote the sale, use or other dealings in
* this Software without prior written authorization from Xilinx.
*
******************************************************************************/
#include "xparameters.h"
#include "xil_cache.h"
#include "platform_config.h"
/*
* Uncomment the following line if ps7 init source files are added in the
* source directory for compiling example outside of SDK.
*/
/*#include "ps7_init.h"*/
#ifdef STDOUT_IS_16550
#include "xuartns550_l.h"
#define UART_BAUD 9600
#endif
void
enable_caches()
{
#ifdef __PPC__
Xil_ICacheEnableRegion(CACHEABLE_REGION_MASK);
Xil_DCacheEnableRegion(CACHEABLE_REGION_MASK);
#elif __MICROBLAZE__
#ifdef XPAR_MICROBLAZE_USE_ICACHE
Xil_ICacheEnable();
#endif
#ifdef XPAR_MICROBLAZE_USE_DCACHE
Xil_DCacheEnable();
#endif
#endif
}
void
disable_caches()
{
Xil_DCacheDisable();
Xil_ICacheDisable();
}
void
init_uart()
{
#ifdef STDOUT_IS_16550
XUartNs550_SetBaud(STDOUT_BASEADDR, XPAR_XUARTNS550_CLOCK_HZ, UART_BAUD);
XUartNs550_SetLineControlReg(STDOUT_BASEADDR, XUN_LCR_8_DATA_BITS);
#endif
#ifdef STDOUT_IS_PS7_UART
/* Bootrom/BSP configures PS7 UART to 115200 bps */
#endif
}
void
init_platform()
{
/*
* If you want to run this example outside of SDK,
* uncomment the following line and also #include "ps7_init.h" at the top.
* Make sure that the ps7_init.c and ps7_init.h files are included
* along with this example source files for compilation.
*/
/* ps7_init();*/
enable_caches();
init_uart();
}
void
cleanup_platform()
{
disable_caches();
}

View File

@@ -0,0 +1,41 @@
/******************************************************************************
*
* Copyright (C) 2008 - 2014 Xilinx, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* Use of the Software is limited solely to applications:
* (a) running on a Xilinx device, or
* (b) that interact with a Xilinx device through a bus or interconnect.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* XILINX CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Except as contained in this notice, the name of the Xilinx shall not be used
* in advertising or otherwise to promote the sale, use or other dealings in
* this Software without prior written authorization from Xilinx.
*
******************************************************************************/
#ifndef __PLATFORM_H_
#define __PLATFORM_H_
#include "platform_config.h"
void init_platform();
void cleanup_platform();
#endif

View File

@@ -0,0 +1,4 @@
#ifndef __PLATFORM_CONFIG_H_
#define __PLATFORM_CONFIG_H_
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,159 @@
struct image_info
{
u32 rows;
u32 columns;
u64 size;
};
struct metrics
{
/*
* AXI Performance Monitor Metrics
*/
u32 apm_read_transactions; //Offset 0 Bytes
u32 apm_read_bytes; //Offset 4 Bytes
u32 apm_write_transactions; //Offset 8 Bytes
u32 apm_write_bytes; //Offset 12 Bytes
u32 apm_packets; //Offset 16 Bytes
u32 apm_bytes; //Offset 20 Bytes
u32 apm_gcc_l; //Offset 24 Bytes
u32 apm_gcc_u; //Offset 28 Bytes
u32 cdma_fetch_time_start_l; //Offset 32 Bytes
u32 cdma_fetch_time_start_u; //Offset 36 Bytes
u32 cdma_fetch_time_end_l; //Offset 40 Bytes
u32 cdma_fetch_time_end_u; //Offset 44 Bytes
u32 cdma_send_time_start_l; //Offset 48 Bytes
u32 cdma_send_time_start_u; //Offset 52 Bytes
u32 cdma_send_time_end_l; //Offset 56 Bytes
u32 cdma_send_time_end_u; //Offset 60 Bytes
u32 dma_accel_time_start_l; //Offset 64 Bytes
u32 dma_accel_time_start_u; //Offset 68 Bytes
u32 dma_accel_time_end_l; //Offset 72 Bytes
u32 dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
u64 total_time_start;
u64 total_time_end;
u64 sleep_time_start;
u64 sleep_time_end;
u64 preparation_time_start;
u64 preparation_time_end;
u64 load_time_start;
u64 load_time_end;
u64 save_time_start;
u64 save_time_end;
};
struct metrics_per_process
{
/*
* AXI Performance Monitor Metrics
*/
u32 apm_read_transactions; //Offset 0 Bytes
u32 apm_read_bytes; //Offset 4 Bytes
u32 apm_write_transactions; //Offset 8 Bytes
u32 apm_write_bytes; //Offset 12 Bytes
u32 apm_packets; //Offset 16 Bytes
u32 apm_bytes; //Offset 20 Bytes
u32 apm_gcc_l; //Offset 24 Bytes
u32 apm_gcc_u; //Offset 28 Bytes
u32 cdma_fetch_time_start_l; //Offset 32 Bytes
u32 cdma_fetch_time_start_u; //Offset 36 Bytes
u32 cdma_fetch_time_end_l; //Offset 40 Bytes
u32 cdma_fetch_time_end_u; //Offset 44 Bytes
u32 cdma_send_time_start_l; //Offset 48 Bytes
u32 cdma_send_time_start_u; //Offset 52 Bytes
u32 cdma_send_time_end_l; //Offset 56 Bytes
u32 cdma_send_time_end_u; //Offset 60 Bytes
u32 dma_accel_time_start_l; //Offset 64 Bytes
u32 dma_accel_time_start_u; //Offset 68 Bytes
u32 dma_accel_time_end_l; //Offset 72 Bytes
u32 dma_accel_time_end_u; //Offset 76 Bytes
struct image_info shared_image_info; // Offset 80 Bytes
/*
* Kernel and Userspace Metrics
*/
u64 total_time_start;
u64 total_time_end;
u64 sleep_time_start;
u64 sleep_time_end;
u64 preparation_time_start;
u64 preparation_time_end;
u64 load_time_start;
u64 load_time_end;
u64 save_time_start;
u64 save_time_end;
u64 set_pages_overhead_time_start;
u64 set_pages_overhead_time_end;
u64 unmap_pages_overhead_time_start;
u64 unmap_pages_overhead_time_end;
};
struct status_flags
{
u32 accel_direct_0_occupied_pid;
u32 accel_direct_1_occupied_pid;
u32 accel_indirect_0_occupied_pid;
u32 accel_indirect_1_occupied_pid;
u32 accel_indirect_2_occupied_pid;
u32 accel_indirect_3_occupied_pid;
u32 accel_sg_0_occupied_pid;
u32 accelerator_busy;
u32 open_modules;
};
struct shared_repository
{
struct metrics unused_shared_metrics;
struct metrics accel_direct_0_shared_metrics;
struct metrics accel_direct_1_shared_metrics;
struct metrics accel_indirect_0_shared_metrics;
struct metrics accel_indirect_1_shared_metrics;
struct metrics accel_indirect_2_shared_metrics;
struct metrics accel_indirect_3_shared_metrics;
struct metrics accel_sg_0_shared_metrics;
struct status_flags shared_status_flags;
};