diff --git a/applications/ta2/designs/ta2_unb2b_qsfp_demo/Makefile b/applications/ta2/designs/ta2_unb2b_qsfp_demo/Makefile index 5b653bf67e43efb6538d2ab4d37b5d4b3caeafc7..bd3834b0e33bb0faa2fe9679b140fbc5716c69b7 100644 --- a/applications/ta2/designs/ta2_unb2b_qsfp_demo/Makefile +++ b/applications/ta2/designs/ta2_unb2b_qsfp_demo/Makefile @@ -1,3 +1,20 @@ +###################### +### SETUP ### +###################### +ifeq ($(VERBOSE),1) +ECHO := +else +ECHO := @ +endif + +# Where is the Intel(R) FPGA SDK for OpenCL(TM) software? +ifeq ($(wildcard $(INTELFPGAOCLSDKROOT)),) +$(error Set INTELFPGAOCLSDKROOT to the root directory of the Intel(R) FPGA SDK for OpenCL(TM) software installation) +endif +ifeq ($(wildcard $(INTELFPGAOCLSDKROOT)/host/include/CL/opencl.h),) +$(error Set INTELFPGAOCLSDKROOT to the root directory of the Intel(R) FPGA SDK for OpenCL(TM) software installation.) +endif + ########################### ### Basic configuration ### ########################### @@ -6,7 +23,7 @@ UNB2B_BSP=ta2_unb2b_bsp # Compile directory -TMPDIR=$(RADIOHDL_BUILD_DIR)/unb2b/OpenCL/$(lastword $(subst /, ,$(dir $(abspath $1)))) +BUILDDIR=$(RADIOHDL_BUILD_DIR)/unb2b/OpenCL/$(lastword $(subst /, ,$(abspath $(dir $(lastword $(MAKEFILE_LIST)))))) ############################## @@ -32,7 +49,49 @@ INCLUDES= $(shell aocl compile-config) #-I.. LDFLAGS= $(shell aocl link-config) #-ldl -lacl_emulator_kernel_rt #-lbfd CXXFLAGS+= $(INCLUDES) +### Emulator configuration +# Emulation Compilation flags +ifeq ($(DEBUG),1) +EMUCXXFLAGS += -g +else +EMUCXXFLAGS += -O2 +endif +# Target +TARGET := host +TARGET_DIR := $(BUILDDIR)/bin + +# Directories +INC_DIRS := host/lib/common/inc +LIB_DIRS := + +# Files +INCS := $(wildcard ) +SRCS := $(wildcard host/src/*.cpp host/lib/common/src/*.cpp host/lib/common/src/AOCLUtils/*.cpp) +LIBS := rt pthread + +### Emulator compilation +# Make it all! +%: %.cl $(TARGET_DIR)/$(TARGET) + (unset DISPLAY; mkdir -p $(BUILDDIR)/$* && $(AOC) -march=emulator -DEMULATOR $< -o $(TARGET_DIR)/$@.aocx -legacy-emulator $(AOCOFLAGS) $(AOCRFLAGS)) + +# Host executable target. +$(TARGET_DIR)/$(TARGET) : Makefile $(SRCS) $(INCS) $(TARGET_DIR) + $(ECHO)$(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC $(foreach D,$(INC_DIRS),-I$D) \ + $(INCLUDES) $(SRCS) $(LDFLAGS) \ + $(foreach D,$(LIB_DIRS),-L$D) \ + $(foreach L,$(LIBS),-l$L) \ + -o $(TARGET_DIR)/$(TARGET) + +$(TARGET_DIR) : + $(ECHO)mkdir -p $(TARGET_DIR) + +# Standard make targets +clean : + $(ECHO)rm -f $(TARGET_DIR)/$(TARGET) + + +### Device compilation %.d: %.cc -$(CXX) $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} $< -o $@ @@ -40,25 +99,22 @@ CXXFLAGS+= $(INCLUDES) $(CXX) -c $(CXXFLAGS) -o $@ $< %.aoco: %.cl - (unset DISPLAY; mkdir -p $(TMPDIR)/$* && cp -a $< $(TMPDIR)/$* && cd $(TMPDIR)/$* && $(AOC) -c $(AOCOFLAGS) $< && cd - && cp -a $(TMPDIR)/$*/$@ .) + (unset DISPLAY; mkdir -p $(BUILDDIR)/$* && cp -a $< $(BUILDDIR)/$* && cd $(BUILDDIR)/$* && $(AOC) -c $(AOCOFLAGS) $< && cd - && cp -a $(BUILDDIR)/$*/$@ .) %.aocr: %.aoco - (unset DISPLAY; cp -a $< $(TMPDIR)/$* && cd $(TMPDIR)/$* && $(AOC) -rtl $(AOCRFLAGS) $< && cd - && cp -a $(TMPDIR)/$*/$@ .) + (unset DISPLAY; cp -a $< $(BUILDDIR)/$* && cd $(BUILDDIR)/$* && $(AOC) -rtl $(AOCRFLAGS) $< && cd - && cp -a $(BUILDDIR)/$*/$@ .) %.aocx: %.aocr - (unset DISPLAY; cp -a $< $(TMPDIR)/$* && cd $(TMPDIR)/$* && $(AOC) $(AOCXFLAGS) $< && cd - && cp -a $(TMPDIR)/$*/$@ .) + (unset DISPLAY; cp -a $< $(BUILDDIR)/$* && cd $(BUILDDIR)/$* && $(AOC) $(AOCXFLAGS) $< && cd - && cp -a $(BUILDDIR)/$*/$@ .) %.sof: %.aocx - (unset DISPLAY; cp -a $(TMPDIR)/$*/flat.sof ./$@) + (unset DISPLAY; cp -a $(BUILDDIR)/$*/flat.sof ./$@) %.rbf: %.sof - (unset DISPLAY; cp -a $(TMPDIR)/$*/flat.rbf ./$@) + (unset DISPLAY; cp -a $(BUILDDIR)/$*/flat.rbf ./$@) %.build: - test -f $@ || test -f /tmp/stop || (echo `hostname` && cp `basename $* _$(lastword $(subst _, ,$*))`.cl $*.cl && SEED=$(lastword $(subst _, ,$*)) time make -j1 $*.aocx && fgrep MHz $(TMPDIR)/$*/$*/quartus_sh_compile.log|tail -n 1) >$@ 2>&1 + test -f $@ || test -f /tmp/stop || (echo `hostname` && cp `basename $* _$(lastword $(subst _, ,$*))`.cl $*.cl && SEED=$(lastword $(subst _, ,$*)) time make -j1 $*.aocx && fgrep MHz $(BUILDDIR)/$*/$*/quartus_sh_compile.log|tail -n 1) >$@ 2>&1 -ifeq (0, $(words $(findstring $(MAKECMDGOALS), clean))) --include $(DEPENDENCIES) -endif diff --git a/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/inc/common.h b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/inc/common.h new file mode 100644 index 0000000000000000000000000000000000000000..d16ade9618880b8bba4bfcbd42b218f6efb69773 --- /dev/null +++ b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/inc/common.h @@ -0,0 +1,48 @@ +#include <iostream> +#include <sstream> +#include <fstream> +#include <iomanip> + +#define CL_HPP_ENABLE_EXCEPTIONS +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY +#include <CL/cl2.hpp> + +void init( + cl::Context &context, + std::vector<cl::Device> &devices); + +void print_platform( + cl::Platform &platform); + +void print_device( + cl::Device &device, + bool marker = false); + +std::string get_source( + std::string& filename); + +std::string get_flags(); + +cl::Program compile_program( + cl::Context& context, + cl::Device& device, + std::string& source); + +void write_source( + std::string& source, + std::string& filename); + +cl::Program get_program( + cl::Context& context, + cl::Device& device, + std::string& filename); + +cl::Kernel get_kernel( + cl::Program& program, + std::string& name); + +double compute_runtime( + cl::Event& start, + cl::Event& end); diff --git a/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/readme.css b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/readme.css new file mode 100644 index 0000000000000000000000000000000000000000..ce1c649289c93957c5eeefe2dec8a7b9d8b7d36a --- /dev/null +++ b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/readme.css @@ -0,0 +1,261 @@ +/* +Copyright (C) 2013-2018 Altera Corporation, San Jose, California, USA. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to +whom the Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +This agreement shall be governed in all respects by the laws of the State of California and +by the laws of the United States of America. +*/ + +body { + margin: 0 1em 1em 1em; + font-family: sans-serif; +} +ul { + list-style-type: square; +} +pre, code, kbd, samp, tt { + font-family: monospace, sans-serif; + font-size: 1em; +} + +h1 { + font-size: 200%; + color: #fff; + background-color: #0067a6; + margin: 0 -0.5em; + padding: 0.25em 0.5em; +} +h1 .preheading { + font-size: 40%; + font-weight: normal; +} +h2 { + font-size: 125%; + background-color: #bae5ff; + margin: 1.5em -0.8em 0 -0.8em; + padding: 0.2em 0.8em; +} +h3 { + margin-top: 1.5em; + font-size: 100%; + border-bottom: 1px dotted #000; +} + +table { + border: 2px solid #0067a6; + border-collapse: collapse; +} +th { + border-bottom: 1px solid #0067a6; + border-left: 1px dotted #0067a6; + border-right: 1px dotted #0067a6; + background-color: #bae5ff; + padding: 0.3em; + font-size: 90%; +} +td { + padding: 0.3em; + border: 1px dotted #0067a6; +} + +table.reqs { + margin: 0 auto; +} +table.reqs td { + white-space: nowrap; + text-align: center; +} +table.reqs td:first-child, +table.reqs tr:first-child th:first-child { + text-align: left; +} +table.reqs td.req { + background-color: #b3ef71; + font-size: 150%; + padding: 0 0.3em; +} +table.reqs td.req .either { + font-size: 50%; +} +table.reqs td.unsupported { + white-space: normal; + background-color: #ccc; + max-width: 20em; +} +table.reqs a.note { + text-decoration: none; +} +ol.req-notes > li { + margin-bottom: 0.75em; +} + +table.history { + margin: 0 auto; +} +table.history td { + text-align: center; + vertical-align: top; +} +table.history .changes { + text-align: left; +} +table.history tbody tr:first-child td { + background-color: #b3ef71; +} +table.history ul { + margin: 0; + padding-left: 1em; +} + +table.pkg-contents { + margin: 0 auto; +} +table.pkg-contents th, +table.pkg-contents td { + text-align: left; + vertical-align: top; +} +table.pkg-contents td.path { + font-family: monospace, sans-serif; + font-size: 1em; +} +table.pkg-contents tr.highlight td { + background-color: #ffc; + font-weight: bold; + color: #000; +} +table.pkg-contents td p:first-child { + margin-top: 0; +} +table.pkg-contents td p:last-child { + margin-bottom: 0; +} + +table.parameters { + margin-left: 3em; + margin-right: 3em; + font-family: monospace, sans-serif; + font-size: 1em; +} +table.parameters th, +table.parameters td { + font-family: sans-serif; + text-align: center; + vertical-align: top; +} +table.parameters .name, +table.parameters .desc { + text-align: left; +} +table.parameters .name { + white-space: nowrap; +} +table.parameters td.name, +table.parameters td.default { + font-family: monospace, sans-serif; + font-size: 1em; +} +table.parameters ul { + margin-top: 0; +} +table.parameters td ul:last-child { + margin-bottom: 0; +} + +table.indent { + margin-left: 3em; +} + +.doc .title { + background-color: #eee; + padding: 0.35em; + margin-bottom: 0.5em; +} +.doc .title a { + font-weight: bold; +} +.doc .desc { + margin-left: 2em; + margin-right: 2em; +} + +.left { + text-align: left; +} +.center { + text-align: center; +} +.right { + text-align: right; +} + +.mono { + font-family: monospace, sans-serif; + font-size: 1em; +} +.highlight { + font-weight: bold; + color: #0067a6; +} +.nowrap { + white-space: nowrap; +} + +.command { + font-family: monospace, sans-serif; + font-size: 1em; + margin: 0 3em; + background-color: #ffc; + border: 1px solid #aaa; + padding: 0.5em 1em; +} +.console-output, +.code-block { + display: block; + font-family: monospace, sans-serif; + font-size: 1em; + margin: 0 3em; + background-color: #fff; + border: 1px solid #aaa !important; + padding: 1.8em 1em 0.5em 1em !important; + position: relative; +} +.console-output .heading, +.code-block .heading { + position: absolute; + left: 0; + top: 0; + width: 100%; + font-size: 80%; + text-transform: uppercase; + background-color: #e8e8e8; + padding: 0.3125em 0; + border-bottom: 1px dotted #888; +} +.console-output .heading span, +.code-block .heading span { + padding: 0 1.25em; +} +.not-released { + font-weight: bold; + color: red; +} +.license, +.trademark { + font-size: 80%; +} diff --git a/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/src/common.cpp b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/src/common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..928b8534b95239c6fa0a29f27640984e5605de17 --- /dev/null +++ b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/lib/common/src/common.cpp @@ -0,0 +1,189 @@ +#include "common.h" + +using namespace std; + +ostream &os = clog; + +void init( + cl::Context &context, + vector<cl::Device> &devices) +{ + vector<cl::Platform> platforms; + cl::Platform::get(&platforms); + + // The selected device + int i = 0; + const char *platform_name = getenv("PLATFORM"); + + if (platform_name == 0) + platform_name = getenv("CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA") ? "Intel(R) FPGA Emulation Platform for OpenCL(TM)" : "Intel(R) FPGA SDK for OpenCL(TM)"; + + os << ">>> OpenCL environment: " << endl; + + // Iterate all platforms + for (cl::Platform &platform : platforms) { + print_platform(platform); + bool selected = platform.getInfo<CL_PLATFORM_NAME>() == platform_name; + + // Get devices for the current platform + vector<cl::Device> devices_; + platform.getDevices(CL_DEVICE_TYPE_ALL, &devices_); + + // Iterate all devices + for (cl::Device &device : devices_) { + if (true)//(selected) + devices.push_back(device); + + print_device(device, selected); + i++; + } + } + os << endl; + + if (devices.size() == 0) { + cerr << "Could not find any device in platform " << platform_name << endl; + exit(EXIT_FAILURE); + } + + context = cl::Context(devices); +} + +void print_platform( + cl::Platform &platform) +{ + os << ">>> Platform: " << endl; + os << "Name : " << platform.getInfo<CL_PLATFORM_NAME>() << endl; + os << "Version : " << platform.getInfo<CL_PLATFORM_VERSION>() << endl; + os << "Extensions : " << platform.getInfo<CL_PLATFORM_EXTENSIONS>() << endl; + os << endl; +} + +void print_device( + cl::Device &device, + bool marker) +{ + os << ">>> Device: "; + if (marker) os << " (selected)"; + os << endl; + os << "Name : " << device.getInfo<CL_DEVICE_NAME>() << endl; + os << "Driver version : " << device.getInfo<CL_DRIVER_VERSION>() << endl; + os << "Device version : " << device.getInfo<CL_DEVICE_VERSION>() << endl; + os << "Compute units : " << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << endl; + os << "Clock frequency : " << device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>() << " MHz" << endl; + os << "Global memory : " << device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() * 1e-9 << " Gb" << endl; + os << "Local memory : " << device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>() * 1e-6 << " Mb" << endl; + os << endl; +} + +string get_source( + string& filename) +{ + // Source directory + string srcdir = "./cl"; + + // All helper files to include in build + vector<string> helper_files; + helper_files.push_back("types.cl"); + helper_files.push_back("math.cl"); + + // Store helper files in string + stringstream source_helper_; + + for (int i = 0; i < helper_files.size(); i++) { + // Get source filename + stringstream source_file_name_; + source_file_name_ << srcdir << "/" << helper_files[i]; + string source_file_name = source_file_name_.str(); + + // Read source from file + ifstream source_file(source_file_name.c_str()); + string source(istreambuf_iterator<char>(source_file), + (istreambuf_iterator<char>())); + source_file.close(); + + // Update source helper stream + source_helper_ << source; + } + + string source_helper = source_helper_.str(); + + // Get source filename + stringstream source_file_name_; + source_file_name_ << srcdir << "/" << filename; + string source_file_name = source_file_name_.str(); + + // Read kernel source from file + ifstream source_file(source_file_name.c_str()); + string source_kernel( + istreambuf_iterator<char>(source_file), + (istreambuf_iterator<char>())); + source_file.close(); + + // Construct full source file + stringstream full_source; + full_source << source_helper; + full_source << source_kernel; + + return full_source.str(); +} + +string get_flags() +{ + return string("-cl-fast-relaxed-math"); +} + +void write_source( + string& source, + string& filename) +{ + cout << ">>> Writing source to: " << filename << endl + << endl; + ofstream source_output; + source_output.open(filename, ofstream::out); + source_output << source; + source_output.close(); +} + +cl::Program get_program( + cl::Context& context, + cl::Device& device, + string& filename) +{ + os << ">>> Loading program from binary: " << filename << endl; + try { + ifstream ifs(filename, ios::in | ios::binary); + string str((istreambuf_iterator<char>(ifs)), istreambuf_iterator<char>()); + cl::Program::Binaries binaries(1, std::make_pair(str.c_str(), str.length())); + vector<cl::Device> devices; + devices.push_back(device); + os << endl; + return cl::Program(context, devices, binaries); + } catch (cl::Error& error) { + cerr << "Loading binary failed: " << error.what() << endl; + exit(EXIT_FAILURE); + } +} + +cl::Kernel get_kernel( + cl::Program& program, + string& name) +{ + os << ">>> Loading kernel: " << name << endl; + try { + os << endl; + return cl::Kernel(program, name.c_str()); + } catch (cl::Error& error) { + cerr << "Loading kernel failed: " << error.what() << endl; + exit(EXIT_FAILURE); + } +} + +double compute_runtime( + cl::Event& start, + cl::Event& end) +{ + double runtime = 0; + runtime -= start.getProfilingInfo<CL_PROFILING_COMMAND_START>(); + runtime += end.getProfilingInfo<CL_PROFILING_COMMAND_START>(); + return runtime * 1e-9; +} diff --git a/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/src/main.cpp b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3dcd159a7b5ae2f2d7fd4d398b2b9aa2e85e3cd1 --- /dev/null +++ b/applications/ta2/designs/ta2_unb2b_qsfp_demo/host/src/main.cpp @@ -0,0 +1,115 @@ + +#include <CL/cl_ext_intelfpga.h> +#include <iostream> +#include <fstream> +#include <vector> +#include "common.h" + +using namespace std; +int main(int argc, char **argv) +{ + if (argc > 2) { + cerr << "usage: " << argv[0] << " [ta2_unb2b_qsfp_demo.aocx]" << endl; + exit(1); + } + + // Initialize OpenCL + cl::Context context; + vector<cl::Device> devices; + init(context, devices); + cl::Device &device = devices[0]; + + // Get program + string filename_bin = string(argc == 2 ? argv[1] : "ta2_unb2b_qsfp_demo.aocx"); + cl::Program program = get_program(context, device, filename_bin); + + + // Setup command queues + vector<cl::CommandQueue> queues(2); + + for (cl::CommandQueue &queue : queues) { + queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); + } + + cl::Event computeDoneA; + cl::Event computeDoneB; + + // Setup FPGA kernels + cl::Kernel writer40gbeKernel(program, "writer_40gbe"); + cl::Kernel writer10gbeKernel(program, "writer_10gbe"); + + // Run FPGA kernels + clog << ">>> Run fpga" << endl; + try { + queues[0].enqueueTask(writer40gbeKernel, nullptr, &computeDoneA); + queues[1].enqueueTask(writer10gbeKernel, nullptr, &computeDoneB); + } catch (cl::Error &error) { + cerr << "Error launching kernel: " << error.what() << endl; + exit(EXIT_FAILURE); + } + + + computeDoneA.wait(); + cl_ulong start = computeDoneA.getProfilingInfo<CL_PROFILING_COMMAND_START>(); + cl_ulong stop = computeDoneA.getProfilingInfo<CL_PROFILING_COMMAND_END>(); + + double milliseconds = (stop - start) / 1e6; + cout << "runtime 40GbE= " << milliseconds << " ms, " << endl; + + computeDoneB.wait(); + start = computeDoneB.getProfilingInfo<CL_PROFILING_COMMAND_START>(); + stop = computeDoneB.getProfilingInfo<CL_PROFILING_COMMAND_END>(); + + milliseconds = (stop - start) / 1e6; + cout << "runtime 10GbE= " << milliseconds << " ms, " << endl; + + +// process 40 GbE output, removing the flag data + const string inputFileA = "kernel_output_40GbE"; + + ifstream fileA(inputFileA); + ostringstream ssA; + ssA << fileA.rdbuf(); + const string& sA = ssA.str(); + vector<char> vecA(sA.begin(), sA.end()); + + // remove flag data + for (size_t i=32; i<vecA.size(); i+=32){ + vecA.erase(vecA.begin()+i); + } + +// process 10GbE output, removing the flag data + const string inputFileB = "kernel_output_10GbE"; + + ifstream fileB(inputFileB); + ostringstream ssB; + ssB << fileB.rdbuf(); + const string& sB = ssB.str(); + vector<char> vecB(sB.begin(), sB.end()); + + // remove flag data + for (size_t i=8; i<vecB.size(); i+=8){ + vecB.erase(vecB.begin()+i); + } + +// Verify that both outputs are the same + if (vecA == vecB) + { + cout << "PASSED" << endl; + } else { + cout << "FAILED: Data from QSFP outputs do not match!" << endl; + } + +// Write clean outputs to file +#if 0 + ofstream output_fileA("clean_kernel_output_40GbE.txt"); + ostream_iterator<char> output_iteratorA(output_fileA, ""); + copy(vecA.begin(), vecA.end(), output_iteratorA); + + ofstream output_fileB("clean_kernel_output_10GbE.txt"); + ostream_iterator<char> output_iteratorB(output_fileB, ""); + copy(vecB.begin(), vecB.end(), output_iteratorB); +#endif + + return EXIT_SUCCESS; +} diff --git a/applications/ta2/designs/ta2_unb2b_qsfp_demo/ta2_unb2b_qsfp_demo.cl b/applications/ta2/designs/ta2_unb2b_qsfp_demo/ta2_unb2b_qsfp_demo.cl index 3cfe03a3ba7012ab494d622e2d359f3c40c075b2..a24b41f316283a10b60f655ba63abbbecbd2175a 100644 --- a/applications/ta2/designs/ta2_unb2b_qsfp_demo/ta2_unb2b_qsfp_demo.cl +++ b/applications/ta2/designs/ta2_unb2b_qsfp_demo/ta2_unb2b_qsfp_demo.cl @@ -24,74 +24,6 @@ channel struct line_40gbe ch_out_40gbe __attribute__((depth(0))) __attribute__( channel struct line_10gbe ch_out_10gbe __attribute__((depth(0))) __attribute__((io("kernel_output_10GbE"))); -#if 0 -struct udp_packet { - struct ethernet_header { - uchar destination_mac[6], source_mac[6]; - ushort ether_type; - } ethernet_header; - - struct ipv4_header { - uchar version_ihl, dscp_ecn; - ushort length; - ushort identification, flags_fragment_offset; - uchar ttl, protocol; - ushort checksum; - uint source_ip_address, destination_ip_address; - } __attribute__((packed)) ipv4_header; - - struct udp_header { - ushort source_port, destination_port; - ushort length, checksum; - } udp_header; - - uchar payload[8192]; -}; - -__constant uchar packet_header[] = { - 0x00, 0x07, 0x43, 0x3b, 0xf6, 0x40, - 0xf4, 0x52, 0x14, 0x94, 0xdc, 0xc1, - 0x08, 0x00, - - 0x45, - 0x00, - (sizeof(struct ipv4_header) + sizeof(struct udp_header) + 8192) / 256, (sizeof(struct ipv4_header) + sizeof(struct udp_header) + 8192) % 256, - 0xc6, 0xd8, - 0x40, 0x00, - 0x40, - 0x11, - 0x00, 0x00, - 0x0a, 0xc4, 0xf8, 0xfe, - 0x0a, 0xc4, 0xf8, 0x02, - - 0x8f, 0x28, - 0x11, 0x5c, - (sizeof(struct udp_header) + 8192) / 256, (sizeof(struct udp_header) + 8192) % 256, - 0x00, 0x00, -}; - - -uint htonl(uint n) -{ -#if defined __ENDIAN_LITTLE__ - return as_uint(as_uchar4(n).wzyx); -#else - return n; -#endif -} - - -ushort htons(ushort n) -{ -#if defined __ENDIAN_LITTLE__ - return as_ushort(as_uchar2(n).yx); -#else - return n; -#endif -} - -#endif - __constant uchar packets[4][8512] __attribute__((aligned(32))) = { { 0x00, 0x07, 0x43, 0x3B, 0xF6, 0x40, 0xF4, 0x52, 0x14, 0x94, 0xDC, 0xC1, 0x08, 0x00, 0x45, 0x00, @@ -2264,19 +2196,33 @@ void write_packet_10gbe(__constant const void *packet, unsigned size) } } - +#ifdef EMULATOR +__attribute__((max_global_work_dim(0))) +#else __attribute__((autorun, max_global_work_dim(0))) +#endif __kernel void writer_40gbe() { +#ifdef EMULATOR + for (int i = 0; i < 4; i++) +#else for (uint2_t i = 0;; i ++) +#endif write_packet_40gbe(packets[i], 8511); } - +#ifdef EMULATOR +__attribute__((max_global_work_dim(0))) +#else __attribute__((autorun, max_global_work_dim(0))) +#endif __kernel void writer_10gbe() { +#ifdef EMULATOR + for (int i = 0; i < 4; i++) +#else for (uint2_t i = 0;; i ++) +#endif write_packet_10gbe(packets[i], 8511); } diff --git a/applications/ta2/doc/README.txt b/applications/ta2/doc/README.txt index d2cfeaaaf7a89eb64ca7d266b0dc4c5f9a0438e9..cd72c208df56aaeb7292664baafbbf80df33b818 100644 --- a/applications/ta2/doc/README.txt +++ b/applications/ta2/doc/README.txt @@ -14,10 +14,10 @@ The ta2 project folder contains 4 sub-folders: SETUP ONCE -- Install Quartus 19.2 with arria10 dependencies and OpenCL dependencies. A later Quartus version may be used for OpenCL compilation. - However, the current BSPs are created for 19.2. +- Install Quartus 19.2 with arria10 dependencies and OpenCL dependencies. A later Quartus version may be used + for OpenCL compilation. However, the current BSPs are created for 19.2. - Aquire the RadioHDL library from GIT or other source. -- Export the following environment variables in your .bashrc and source the setup script as follows: +- Export the following environment variables in your .bashrc and source the radiohdl init script as follows: INTEL_ROOTDIR=/home/software/Altera/19.2 # For example export QUARTUS_ROOTDIR=$INTEL_ROOTDIR/quartus @@ -46,28 +46,62 @@ SETUP ONCE - Make sure the hdl_buildset_unb2b.cfg is correctly configured for this Quartus version. -- Generate all IP by executing generate_ip_libs unb2b - - -COMPILING OPENCL APPLICATION +- Generate all IP by executing: + -> generate_ip_libs unb2b + + +COMPILING EXAMPLE OPENCL APPLICATION FOR EMULATION +The example application used is "ta2_unb2b_qsfp_demo", this application generates UDP packets and outputs them +to 40GbE and 10GbE. In emulation, this design is verified by comparing the 10GbE and 40GbE outputs to check if +the data is identical. If that is the case, the output will state "PASSED". + +- Navigate to $RADIOHDL_WORK/applications/ta2/designs/ta2_unb2b_qsfp_demo +- First we need to compile the OpenCL application for emulation and compile the host code. This can be done by + the provided Makefile. Execute the command: + -> make ta2_unb2b_qsfp_demo +- This creates the executable "host" and the aocx file "ta2_unb2b_qsfp_demo.aocx" located at: + $RADIOHDL_BUILD_DIR/unb2b/OpenCL/ta2_unb2b_qsfp_demo/bin +- First navigate to that directory and then run this executable using the following commands: + -> cd $RADIOHDL_BUILD_DIR/unb2b/OpenCL/ta2_unb2b_qsfp_demo/bin + -> CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 ./host +- At the end of the application output you should see "PASSED". + +COMPILING EXAMPLE OPENCL APPLICATION FOR UNB2B +The example application used is "ta2_unb2b_qsfp_demo", this application generates UDP packets and outputs them +to 40GbE and 10GbE. On hardware, you can verify the design by connecting a PC to the 40GbE (QSFP1) or +10GbE (QSFP0) output and check for incoming packets using tcpdump. +- Navigate to $RADIOHDL_WORK/applications/ta2/designs/ta2_unb2b_qsfp_demo +- This example design uses the "ta2_unb2b_bsp" OpenCL BSP. If you did not initialize the BSP already, + execute the commands below. + -> quartus_config unb2b; run_qsys unb2b name_of_your_BSP board.qsys; +- The OpenCL application can be compiled for UniBoard2b using the provided Makefile, this is done by executing: + -> make ta2_unb2b_qsfp_demo.sof ta2_unb2b_qsfp_demo.rbf +- After a long compilation time (can take hours) you will find the files ta2_unb2b_qsfp_demo.sof and + ta2_unb2b_qsfp_demo.rbf in the current directory. + + +CREATING NEW OPENCL APPLICATION - Start by copying the example application located in $RADIOHDL_WORK/applications/ta2/designs/ta2_unb2b_qsfp_demo - rename the folder and ta2_unb2b_qsfp_demo.cl -- If you need to use a specific OpenCL BSP, open the Makefile and change the BSP Name into the name of the BSP you want to use (see OVERVIEW). +- If you need to use a specific OpenCL BSP, open the Makefile and change the BSP Name into the name of the BSP + you want to use (see OVERVIEW). - If you did not initialize the BSP, execute the commands below. quartus_config unb2b; run_qsys unb2b name_of_your_BSP board.qsys; - Now you can create your application in the *.cl file. -- With the provided makefile you can compile your application by executing: +- With the provided makefile you can compile your application for UniBoard2b by executing: make myApp.sof myApp.rbf where "myApp" is the name of your .cl file - Note that by default your application is build in $RADIOHDL_BUILD_DIR/unb2b/OpenCL/myApp - where "myApp" is the name of your .cl file - + where "myApp" is the name of your design directory file. This directory includes the + Quartus project for analysis using the Quartus GUI. +- For emulation, you need to modify the host code located in host/src/main.cpp to fit your design. + + FLASH SOF TO FPGA -The quickest way to program the FPGA is to use a JTAG connection and program the FPGA with the Quartus programmer, writing the .sof file. +The quickest way to program the FPGA is to use a JTAG connection and program the FPGA with the Quartus +programmer, writing the .sof file. - To configure a jtagserver you can use the command: jtagconfig -addserver <server name> <password> - For using dop36 to program the Uniboard2 in the lab, the command is: - jtagconfig --addserver dop36 BG132V051 - To program the FPGA use the following command: quartus_pgm -c USB-BLASTERII -m jtag -o p\;my_app.sof@1 @@ -83,20 +117,28 @@ quartus_pgm -c USB-BLASTERII -m jtag -o p\;my_app.sof@1 -o p\;my_app.sof@2 -o p\ FLASH RBF TO FPGA -If a JTAG connection is not available, the application can be written using the .rbf file over a 1GbE connection. -This is achieved by running the util_unb2.py script +If a JTAG connection is not available or you want your application to stay in flash, the application can be +written using the .rbf file over a 1GbE connection. This is achieved by running the util_unb2.py peripheral +script in $UPE_GEAR/peripherals CREATING A NEW BSP -- BSPs are located in "$AOCL_BOARD_PACKAGE_ROOT/hardware" which is previously defined as "$RADIOHDL_WORK/applications/ta2/bsp/hardware" -- To create a new BSP it is easiest to copy an existing one. In this example we would make a copy of the folder "ta2_unb2b_bsp" and rename it to example_bsp. +- BSPs are located in "$AOCL_BOARD_PACKAGE_ROOT/hardware" which is defined as + "$RADIOHDL_WORK/applications/ta2/bsp/hardware" +- To create a new BSP it is easiest to copy an existing one. In this example we would make a copy of the + directory "ta2_unb2b_bsp" and rename it to example_bsp. - inside the new folder "example_bsp" we need to change 2 files: board_spec.xml and hdllib.cfg - In board_spec.xml replace the two occurrences of "ta2_unb2b_bsp" into "example_bsp" (lines 2 and 5). - In hdllib.cfg replace "ta2_unb2b_bsp" into "example_bsp" and "ta2_unb2b_bsp_lib" into "example_bsp_lib" - -- Now we can make changes to this BSP by editing top.vhd, this is very similar to normal unb2b top-level files. The difference is that top.vhd uses the "board" component - defined in top_components_pkg.vhd instead of the usual mmm component. You will also find another component "freeze_wrapper" is used which is defined in top_components_pkg.vhd. -- freeze_wrapper is the OpenCL kernel wrapper. which is located at ip/freeze_wrapper.v. freeze_wrapper instantiates pr_region.v which instantiates kernel_system (OpenCL kernel). +- Now we can make changes to this BSP by editing top.vhd, this is very similar to normal unb2b top-level + files. The difference is that top.vhd uses the "board" component defined in top_components_pkg.vhd + instead of the usual mmm component. You will also find another component "freeze_wrapper" is used which + is defined in top_components_pkg.vhd. +- freeze_wrapper is the OpenCL kernel wrapper. which is located at ip/freeze_wrapper.v. "freeze_wrapper" + instantiates pr_region.v which instantiates the generated "kernel_system" (OpenCL kernel). +- IO channels are defined in board_spec.xml between <channels> ... </channels>. These lines define + what in- and outputs the OpenCL kernel will generate. When you change these IO channel definitions, you + need to modify the files ip/pr_region.v, ip/freeze_wrapper.v, top_components_pkg.vhd and top.vhd accordingly.