Presentation is loading. Please wait.

Presentation is loading. Please wait.

Sasa Stojanovic 2/x  E#1: Hello world  E#2: Vector addition  E#3: Type mixing  E#4: Addition of a constant and a vector  E#5: Input/output.

Similar presentations


Presentation on theme: "Sasa Stojanovic 2/x  E#1: Hello world  E#2: Vector addition  E#3: Type mixing  E#4: Addition of a constant and a vector  E#5: Input/output."— Presentation transcript:

1 Sasa Stojanovic

2 2/x  E#1: Hello world  E#2: Vector addition  E#3: Type mixing  E#4: Addition of a constant and a vector  E#5: Input/output control  E#6: Conditional execution  E#7: Moving average 1D  E#8: Moving average 2D  E#9: Array summation  E#10: Optimization of E#9

3 3/x  Write a program that sends the “Hello World!” string to the MAX2 card, for the MAX2 card kernel to return it back to the host.  To be learned through this example: ◦ How to make the configuration of the accelerator (MAX2 card) using Java:  How to make a simple kernel (ops description) using Java (the only language),  How to write the standard manager (config description based on kernel(s)) using Java, ◦ How to test the kernel using a test (code+data) written in Java, ◦ How to compile the Java code for MAX2, ◦ How to write a simple C code that runs on the host and triggers the kernel,  How to write the C code that streams data to the kernel,  How to write the C code that accepts data from the kernel, ◦ How to simulate and execute an application program in C that runs on the host and periodically calls the accelerator. Example No. 1

4 4/x  One or more kernel files, to define operations of the application: ◦ Kernel[ ].java  One (or more) Java file, for simulation of the kernel(s): ◦ SimRunner.java  One manager file for transforming the kernel(s) into the configuration of the MAX card (instantiation and connection of kernels): ◦ Manager.java  Simulator builder: ◦ HostSimBuilder.java  Hardware builder: ◦ HWBuilder.java  Application code that uses the MAX card accelerator: ◦ HostCode.c  Makefile ◦ A script file that defines the compilation related commands Example No. 1

5 5/x package ind.z1; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class helloKernel extends Kernel { public helloKernel(KernelParameters parameters) { super(parameters); // Input: HWVar x = io.input("x", hwInt(8)); HWVar result = x; // Output: io.output("z", result, hwInt(8)); } Example No. 1 It is possible to substitute the last three lines with: io.output("z", result, hwInt(8));

6 6/x package ind.z1; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class helloSimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager(“helloSim"); helloKernel k = new helloKernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } Example No. 1

7 7/x package ind.z1; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class helloHostSimBuilder { public static void main(String[] args) { Manager m = new Manager(true,”helloHostSim", BOARDMODEL); Kernel k = new helloKernel(m.makeKernelParameters(“helloKernel")); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); } Example No. 1

8 8/x package ind.z1; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class helloHWBuilder { public static void main(String[] args) { Manager m = new Manager(“hello", BOARDMODEL); Kernel k = new helloKernel( m.makeKernelParameters() ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); } Example No. 1

9 9/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; char data_in1[16] = "Hello world!"; char data_out[16]; printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_hello(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); Example No. 1

10 10/x printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, 16 * sizeof(char)), max_output("z", data_out, 16 * sizeof(char)), max_runfor(“helloKernel", 16), max_end()); printf("Checking data read from FPGA.\n"); max_close_device(device); max_destroy(maxfile); return 0; } Example No. 1

11 11/x # Root of the project directory tree BASEDIR=../../.. # Java package name PACKAGE=ind/z1 # Application name APP=example1 # Names of your maxfiles HWMAXFILE=$(APP).max HOSTSIMMAXFILE=$(APP)HostSim.max # Java application builders HWBUILDER=$(APP)HWBuilder.java HOSTSIMBUILDER=$(APP)HostSimBuilder.java SIMRUNNER=$(APP)SimRunner.java # C host code HOSTCODE=$(APP)HostCode.c # Target board BOARD_MODEL=23312 # Include the master makefile.include nullstring := space := $(nullstring) # comment MAXCOMPILERDIR_QUOTE:=$(subst $(space),\,$(MAXCOMPILERDIR)) include $(MAXCOMPILERDIR_QUOTE)/examples/common/Makefile.include Example No. 1

12 12/x package config; import com.maxeler.maxcompiler.v1.managers.MAX2BoardModel; public class BoardModel { public static final MAX2BoardModel BOARDMODEL = MAX2BoardModel.MAX2336B; } Example No. 1

13 13/x Types

14 14/x  Floating point numbers - HWFloat: ◦ hwFloat(exponent_bits, mantissa_bits); ◦ float ~ hwFloat(8,24) ◦ double ~ hwFloat(11,53)  Fixed point numbers - HWFix: ◦ hwFix(integer_bits, fractional_bits, sign_mode)  SignMode.UNSIGNED  SignMode.TWOSCOMPLEMENT  Integers - HWFix: ◦ hwInt(bits) ~ hwFix(bits, 0, SignMode.TWOSCOMPLEMENT)  Unsigned integers - HWFix: ◦ hwUint(bits) ~ hwFix(bits, 0, SignMode.UNSIGNED)  Boolean – HWFix: ◦ hwBool() ~ hwFix(1, 0, SignMode.UNSIGNED) ◦ 1 ~ true ◦ 2 ~ false  Raw bits – HWRawBits: ◦ hwRawBits(width) Types

15 15/x  Write a program that adds two arrays of floating point numbers.  Program reads the size of arrays, makes two arrays with an arbitrary content (test inputs), and adds them using a MAX card. Example No. 2

16 16/x package ind.z2; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example2Kernel extends Kernel { public example2Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwFloat(8,24)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); } Example No. 2

17 17/x package ind.z2; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example2SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example2Sim"); example2Kernel k = new example2Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2, 3, 4, 5, 6, 7, 8, 9); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 5, 7, 9, 11, 13, 15, 17 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } Example No. 2

18 18/x package ind.z2; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example2HostSimBuilder { public static void main(String[] args) { Manager m = new Manager(true,"example2HostSim", BOARDMODEL); Kernel k = new example2Kernel( m.makeKernelParameters("example2Kernel") ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); } Example No. 2

19 19/x package ind.z2; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example2HWBuilder { public static void main(String[] args) { Manager m = new Manager("example2", BOARDMODEL); Kernel k = new example2Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); } Example No. 2

20 20/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_in2, *data_out; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_in2 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; data_in2[i] = i%3; } printf("Opening and configuring FPGA.\n"); Example No. 2

21 21/x maxfile = max_maxfile_init_example2(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, N * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example2Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++) if (data_out[i] != i%10 + i%3){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+i%3), data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0; } Example No. 2

22 22/x  Do the same as in the example no 2, with the following modification: one input array contains floating point numbers, and the other one contains integers. Example No. 3

23 23/x  Casting here means moving data from one form to another, without changing their essence.  Type is: ◦ specified for inputs and outputs, ◦ propagated from inputs, down the dataflow graph to outputs, ◦ used to check that output stream has correct type.  If conversion is needed, explicit conversion (cast) is required  How to do it? ◦ use the method cast in class HWVar,  Additional hardware required (especially for conversion to or from floating point numbers), ◦ introduces additional latency.  Cast between a floating point number and an integer number is done by rounding to the nearest integer! Example No. 3

24 24/x package ind.z3; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example3Kernel extends Kernel { public example3Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwInt(32)); HWVar result = x + y.cast(hwFloat(8,24)); // Output io.output("z", result, hwFloat(8,24)); } Example No. 3

25 25/x package ind.z3; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example3SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example3Sim"); example3Kernel k = new example3Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2, 3, 4, 5, 6, 7, 8, 9); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 5, 7, 9, 11, 13, 15, 17 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } Example No. 3

26 26/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out; int *data_in2; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_in2 = malloc(N * sizeof(int)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; data_in2[i] = i%3; } printf("Opening and configuring FPGA.\n"); Example No. 3

27 27/x maxfile = max_maxfile_init_example3(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, N * sizeof(int)), max_output("z", data_out, N * sizeof(float)), max_runfor("example3Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ if (data_out[i] != i%10 + i%3){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+i%3), data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0; } Example No. 3

28 28/x  Command: ◦ maxRenderGraphs ◦ - directory where the design is compiled  In the virtual machine, directory “Desktop/MaxCompiler-Builds” contains the build directories.  Example for application “example2”: ◦ maxRenderGraphs example2HostSim ◦ Renders graphs for the resulting max file Generating Graph

29 29/x Generating Graph

30 30/x Generating Graph

31 31/x  Write a program that adds a constant to an array that contains floating point numbers.  Program: ◦ reads the size of the array and the constant that will add to elements of the array, ◦ makes one array in an arbitrary way, and ◦ adds the constant to the array using the MAX card. Example No. 4

32 32/x package ind.z4; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example4Kernel extends Kernel { public example4Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.scalarInput("y", hwFloat(8,24)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); } Example No. 4

33 33/x  example4SimRunner.java: ◦ Before the kernel run, invoke: setScalarInput(“y”,2);  example4HostCode.c: ◦ Read const from standard input, ◦ After the device is opened, but before run, set scalar inputs: max_set_scalar_input_f(device, “example4Kernel.y”, const_add, FPGA_A); max_upload_runtime_params(device, FPGA_A); Example No. 4

34 34/x  Do the same as in example no 4, with the following modification: use controlled inputs and counters. Example No. 5

35 35/x package ind.z5; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example5Kernel extends Kernel { public example5Kernel(KernelParameters parameters) { super(parameters); HWVar ie = control.count.simpleCounter(32); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwFloat(8,24), ie.eq(0)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); } Example No. 5

36 36/x package ind.z5; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example5SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example5Sim"); example5Kernel k = new example5Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 4, 5, 6, 7, 8, 9, 10 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } Example No. 5

37 37/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, data_in2[2], *data_out; unsigned long N, i; printf("Enter size of array: "); scanf("%lu%f",&N, data_in2); data_in1 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++) data_in1[i] = i%10; printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example5(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); Example No. 5

38 38/x printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, 2 * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example5Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ if (data_out[i] != i%10 + data_in2[0]){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+data_in2[0]), data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0; } Example No. 5

39 39/x  Translate the following part of code for the Maxeler MAX2 card: for(int i=0; i

40 40/x package ind.z6; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example6Kernel extends Kernel { public example6Kernel(KernelParameters parameters) { super(parameters); // Input HWVar a = io.input("a", hwFloat(8,24)); HWVar b = io.input("b", hwFloat(8,24)); HWVar c = ~a.eq(b)?b-a:a; HWVar d = ~a.eq(b)?a*b/c:a+b; // Output io.output("c", c, hwFloat(8,24)); io.output("d", d, hwFloat(8,24)); } Example No. 6

41 41/x package ind.z6; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example6SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example6Sim"); example6Kernel k = new example6Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("a", 1, 3); m.setInputData("b", 2, 3); m.setKernelCycles(2); m.runTest(); m.dumpOutput(); double expectedOutputc[] = { 1, 3 }; double expectedOutputd[] = { 2, 6 }; m.checkOutputData("c", expectedOutputc); m.checkOutputData("d", expectedOutputd); m.logMsg("Test passed OK!"); } Example No. 6

42 42/x  Write a program that calculates moving average over an array, calculating the average value for each one of the three successive elements of the input array. (a[0]+a[1])/2, for i = 0; avg[i] = (a[i-1]+a[i]+a[i+1])/3, for 0 < i < n-1; (a[n-2]+a[n-3],for i = n-1. Example No. 7

43 43/x package ind.z7; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example7Kernel extends Kernel { public example7Kernel(KernelParameters parameters) { super(parameters); HWVar N = io.scalarInput("N", hwUInt(64)); HWVar count = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar result = ( (count>0?stream.offset(x,-1):0) + x + (count0&count

44 44/x  Write a program that calculates moving average along a 2D matrix of the size MxN.  Transfer the matrix to the MAX2 card through one stream, row by row. Example No. 8

45 45/x package ind.z8; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.Stream.OffsetExpr; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example8Kernel extends Kernel { public example8Kernel(KernelParameters parameters) { super(parameters); HWVar M = io.scalarInput("M", hwUInt(32)); OffsetExpr Nof = stream.makeOffsetParam("Nof", 3, 128); HWVar N = io.scalarInput("N", hwUInt(32)); CounterChain cc = control.count.makeCounterChain(); HWVar j = cc.addCounter(M,1); HWVar i = cc.addCounter(N,1); Example No. 8

46 46/x // Input HWVar mat = io.input("mat", hwFloat(8,24)); // Extract 8 point window around current point HWVar window[] = new HWVar[9]; int ii = 0; for ( int x=-1; x<=1; x++) for ( int y= -1; y<=1; y++) window[ii++] = (i.cast(hwInt(33))+x>=0 & i.cast(hwInt(33))+x = 0 & j.cast(hwInt(33))+y<=M.cast(hwInt(33))-1)?stream.offset(mat, y*Nof+x):0; // Sum points in window and divide by 9 to average HWVar sum = constant.var(hwFloat(8, 24), 0); for ( HWVar hwVar : window) { sum = sum + hwVar; } HWVar divider = i.eq(0)|i.eq(N-1)|j.eq(0)|j.eq(M-1)?((i.eq(0)|i.eq(N-1))&(j.eq(0)|j.eq(M- 1))?constant.var(hwFloat(8,24),4):6):9; HWVar result = sum / divider; // Output io.output("z", result, hwFloat(8,24)); } Example No. 8

47 47/x package ind.z8; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example8SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example8Sim"); example8Kernel k = new example8Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("mat", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11,12, 13,14,15,16); m.setScalarInput("M", 4); m.setScalarInput("N", 4); m.setStreamOffsetParam("Nof",4); m.setKernelCycles(16); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3.5, 4, 5, 5.5, 5.5, 6, 7, 7.5, 9.5, 10, 11, 11.5, 11.5, 12, 13, 13.5 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } Example No. 8

48 48/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_in2, *data_out; unsigned long M, N, i; printf("Enter size of matrix (MxN, max 1024x1024): "); scanf("%lu%lu",&M,&N); data_in1 = malloc(M*N * sizeof(float)); data_out = malloc(M*N * sizeof(float)); for(i = 0; i < M*N; i++){ data_in1[i] = i%10; } printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example8(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); Example No. 8

49 49/x max_set_scalar_input_f(device, "example8Kernel.M", M, FPGA_A); max_set_scalar_input_f(device, "example8Kernel.N", N, FPGA_A); max_set_runtime_param(device, "example8Kernel.Nof", N); max_upload_runtime_params(device, FPGA_A); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("mat", data_in1, M*N * sizeof(float)), max_output("z", data_out, M*N * sizeof(float)), max_runfor("example8Kernel", M*N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < M*N; i++){ float expected=0, divider = 9; for (int ii = -1; ii<2; ii++) for(int jj = -1; jj =0 && i/N+ii =0 && i%N+jj

50 50/x  Write a program that calculates the sum of n floating point numbers. Example No. 9

51 51/x package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -1); // Output io.output("z", result, hwFloat(8,24)); } Example No. 9 Problem?

52 52/x Example No. 9

53 53/x package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>12?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24)); } Example No. 9 Solution: New offset = Depth of pipeline loop

54 54/x package ind.z9; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example9SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example9Sim"); example9Kernel k = new example9Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 0, 0, 0, 3, 0, 0, 0, 9, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 3); m.setKernelCycles(27); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 3, 6 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } Example No unnecessarily data Still, we need to send 13 times mor data then needed

55 55/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * 13 * sizeof(float)); data_out = malloc(N * 13 * sizeof(float)); for(i = 0; i < N; i++) for( int j=0; j<13; j++) data_in1[13*i+j] = i%10; printf("Opening and configuring FPGA.\n"); Example No. 9

56 56/x maxfile = max_maxfile_init_example9(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device,max_input("x", data_in1, N * 13 * sizeof(float)), max_output("z", data_out, N * 13* sizeof(float)), max_runfor("example9Kernel", N * 13), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ expected += !(i%13) ? i%10 : 0; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0; } Example No. 9

57 57/x package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); CounterChain cc = control.count.makeCounterChain(); HWVar cnt = cc.addCounter( ,1); HWVar depth = cc.addCounter(13,1); // Input HWVar x = io.input("x", hwFloat(8,24), depth.eq(0) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), depth.eq(0)); } Example No. 9

58 58/x package ind.z9; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example9SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example9Sim"); example9Kernel k = new example9Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3); m.setKernelCycles(27); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 3, 6 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } Example No. 9 We still need at least 27 cycles.

59 59/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++) data_in1[i] = i%10; printf("Opening and configuring FPGA.\n"); Example No. 9

60 60/x maxfile = max_maxfile_init_example9(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device,max_input("x", data_in1, N * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example9Kernel", N * ), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ expected += i%10; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0; } Example No. 9

61 61/x  Write an optimized program that calculates the sum of numbers in an input array  First, calculate several parallel/partial sums; then, add them at the end Example No. 10

62 62/x package ind.z10; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example10Kernel1 extends Kernel { public example10Kernel1(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar N = io.scalarInput("N", hwUInt(64)); HWVar x = io.input("x", hwFloat(8,24) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), cnt > N-14); } Example No. 10

63 63/x package ind.z10; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; public class example10Kernel2 extends Kernel { public example10Kernel2(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); CounterChain cc = control.count.makeCounterChain(); HWVar cnt = cc.addCounter(14,1); HWVar depth = cc.addCounter(13,1); // Input HWVar x = io.input("x", hwFloat(8,24), depth.eq(0) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), cnt.eq(12)); } Example No. 10

64 64/x package ind.z10; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example10SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example10Sim"); example10Kernel1 k = new example10Kernel1( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26); m.setKernelCycles(26); m.runTest(); m.dumpOutput(); double exOutput[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39 }; m.checkOutputData("z", exOutput); m.logMsg("Test passed OK!"); } Example No. 10

65 65/x package ind.z10; import com.maxeler.maxcompiler.v1.managers.custom.blocks.KernelBlock; import com.maxeler.maxcompiler.v1.managers.custom.CustomManager; import com.maxeler.maxcompiler.v1.managers.MAXBoardModel; class example10Manager extends CustomManager { public example10Manager(boolean is_simulation, String name, MAXBoardModel board_model ){ super(is_simulation, board_model, name); KernelBlock kb1 = addKernel(new example10Kernel1(makeKernelParameters("example10Kernel1"))); KernelBlock kb2 = addKernel(new example10Kernel2(makeKernelParameters("example10Kernel2"))); kb1.getInput("x") <== addStreamFromHost("x"); kb2.getInput("x") <== kb1.getOutput("z"); addStreamToHost("z") <== kb2.getOutput("z"); } Example No. 10

66 66/x package ind.z10; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.managers.BuildConfig; import com.maxeler.maxcompiler.v1.managers.BuildConfig.Level; public class example10HostSimBuilder { public static void main(String[] args) { example10Manager m = new example10Manager(true,"example10HostSim", BOARDMODEL); m.setBuildConfig(new BuildConfig(Level.FULL_BUILD)); m.build(); } Example No. 10

67 67/x package ind.z10; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example10HWBuilder { public static void main(String[] args) { example10Manager m = new example10Manager(false,"example10HostSim", BOARDMODEL); m.setBuildConfig(new BuildConfig(Level.FULL_BUILD)); m.build(); } Example No. 10

68 68/x #include int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array (it will be truncated to the firs lower number dividable with 13): "); scanf("%lu",&N); N /= 13; N *= 13; data_in1 = malloc(N * sizeof(float)); data_out = malloc(1 * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; expected += data_in1[i]; } Example No. 10

69 69/x printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example10(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); max_set_scalar_input_f(device, "example10Kernel1.N", N, FPGA_A); max_upload_runtime_params(device, FPGA_A); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_output("z", data_out, 2 * sizeof(float)), max_runfor("example10Kernel1", N), max_runfor("example10Kernel2", 13*12+2), max_end()); printf("Checking data read from FPGA.\n"); printf("Expected: %f, returned: %f\n", expected, *data_out); max_close_device(device); max_destroy(maxfile); return 0; } Example No. 10


Download ppt "Sasa Stojanovic 2/x  E#1: Hello world  E#2: Vector addition  E#3: Type mixing  E#4: Addition of a constant and a vector  E#5: Input/output."

Similar presentations


Ads by Google