The following CSharp file was compiled with mono mcs.
// // Kiwi Scientific Acceleration Example - Simple floating point tests. // (C) 2014 DJ Greaves, University of Cambridge, Computer Laboratory. // using System; using System.Text; using KiwiSystem; using System.Diagnostics; public class test49 { const int problemSize = 6; static double [] data = new double [problemSize]; static volatile int volx = 100; // This defeats compile-time constant propagation. public static void test49_phase0() { Console.WriteLine("Kiwi Demo - Test49 phase0 starting."); for (int i=0; i<problemSize; i++) { double qfp0 = (double)((volx+i)*3330.2); Kiwi.Pause(); Console.WriteLine("data {0} qfp0={1}", i, qfp0); float qfp1 = (float) qfp0; Kiwi.Pause(); float qfp2 = 7.12345f * (float) i; Kiwi.Pause(); int qfp3 = (int) qfp1; Console.WriteLine(" qfp1={0} qfp2={1} qfp3={2}", qfp1, qfp2, qfp3); } } public static void test49_phase1() { Console.WriteLine("Kiwi Demo - Test49 phase1 starting."); Kiwi.Pause(); for (int i=0; i<problemSize; i++) data[i] = 3.1415; data[problemSize-1] = 2.71; for (int it=0; it<3; it++) { Kiwi.Pause(); data[1] *= 100.0; data[2] -= 100.0; data[3] /= 100.0; data[4] += 100.0; for (int i=0; i<problemSize; i++) { Console.WriteLine("data {0} is {1}", i, data[i]); } } } [Kiwi.HardwareEntryPoint()] public static void Main() { Console.WriteLine("Kiwi Demo - Test49 starting."); Kiwi.Pause(); test49_phase0(); test49_phase1(); Console.WriteLine("Test49 done."); } }
module DUT(input clk, input reset); function [31:0] hpr_dbl2flt4; input [63:0] arg; reg signi; reg [10:0] expi; reg [51:0] manti; reg [7:0] expo; reg [22:0] manto; reg overflow, scase_inf, scase_zero, scase_nan, fail; begin { signi, expi, manti } = arg; // Deconstruct input arg scase_zero = (arg[62:0] == 63'd0); scase_inf = (expi == 11'h7ff) && (manti == 0); scase_nan = (expi == 11'h7ff) && (manti != 0); // We can report fail on overflow but better to report infinity. fail = 0; overflow = (expi[10] == expi[9]) ||(expi[10] == expi[8]) ||(expi[10] == expi[7]); expo = { expi[10], expi[6:0]}; manto = manti[51:51-22]; scase_inf = scase_inf || overflow; hpr_dbl2flt4[31] = signi; hpr_dbl2flt4[30:23] = (scase_inf)? 8'hff: (scase_nan)? 8'hff: (scase_zero)? 8'd0: expo; hpr_dbl2flt4[22:0] = (scase_inf)? 23'd0: (scase_nan)? -23'd1: (scase_zero)? 23'd0: manto; end endfunction function [63:0] hpr_flt2dbl3; input [31:0] darg; hpr_flt2dbl3 = {darg[31], darg[30], {3{~darg[30]}}, darg[29:23], darg[22:0], {29{1'b0}}}; endfunction ... snip ... module DUT(input clk, input reset); function [31:0] hpr_dbl2flt4; input [63:0] arg; reg signi; reg [10:0] expi; reg [51:0] manti; reg [7:0] expo; reg [22:0] manto; reg overflow, scase_inf, scase_zero, scase_nan, fail; begin { signi, expi, manti } = arg; // Deconstruct input arg scase_zero = (arg[62:0] == 63'd0); scase_inf = (expi == 11'h7ff) && (manti == 0); scase_nan = (expi == 11'h7ff) && (manti != 0); fail = 0; overflow = (expi[10] == expi[9]) ||(expi[10] == expi[8]) ||(expi[10] == expi[7]); expo = { expi[10], expi[6:0]}; manto = manti[51:51-22]; scase_inf = scase_inf || overflow; hpr_dbl2flt4[31] = signi; hpr_dbl2flt4[30:23] = (scase_inf)? 8'hff: (scase_nan)? 8'hff: (scase_zero)? 8'd0: expo; hpr_dbl2flt4[22:0] = (scase_inf)? 23'd0: (scase_nan)? -23'd1: (scase_zero)? 23'd0: manto; end endfunction function [63:0] hpr_flt2dbl3; input [31:0] darg; hpr_flt2dbl3 = {darg[31], darg[30], {3{~darg[30]}}, darg[29:23], darg[22:0], {29{1'b0}}}; endfunction
Full RTL output file: test49.v (Verilog).
// // Kiwi Scientific Acceleration // University of Cambridge, Computer Laboratory // // vsys.v - A test wrapper for simulating very simple tests with clock and reset. // (C) 2010-16 DJ Greaves, University of Cambridge. // // // `timescale 1ns/1ns module SIMSYS(); reg clk, reset; initial begin reset = 1; clk = 1; # 33 reset = 0; end always #5 clk = !clk; // 10ns period for clock = 100 MHz (lowish FPGA clock freq!) initial begin # (100 * 1000 * 1000) $display("Finish HDL simulation on timeout %t.", $time); $finish(); end initial begin $dumpfile("vcd.vcd"); $dumpvars(); end DUT the_dut(.clk(clk), .reset(reset)); endmodule
The above, generated RTL is run on the Icarus verilog simulator.
iverilog vsys.v test49.v /home/djg11/d320/hprls/kiwipro/kiwic/distro/lib/cvgates.v /home/djg11/d320/hprls/kiwipro/kiwic/distro/lib/cv_fparith.v ./a.out VCD info: dumpfile vcd.vcd opened for output. Kiwi Demo - Test49 starting. Kiwi Demo - Test49 phase0 starting. data 0 qfp0=333020.000000 qfp1=333019.968750 qfp2=0.000000 qfp3=333019 data 1 qfp0=336350.200000 qfp1=336350.187500 qfp2=7.123450 qfp3=336350 data 2 qfp0=339680.400000 qfp1=339680.375000 qfp2=14.246900 qfp3=339680 data 3 qfp0=343010.600000 qfp1=343010.593750 qfp2=21.370348 qfp3=343010 data 4 qfp0=346340.800000 qfp1=346340.781250 qfp2=28.493799 qfp3=346340 data 5 qfp0=349671.000000 qfp1=349670.968750 qfp2=35.617249 qfp3=349670 Kiwi Demo - Test49 phase1 starting. phase1: data 0 is 3.141500 phase1: data 1 is 314.150000 phase1: data 2 is -96.858500 phase1: data 3 is 0.031415 phase1: data 4 is 103.141500 phase1: data 5 is 2.710000 phase1: data 0 is 3.141500 phase1: data 1 is 31415.000000 phase1: data 2 is -196.858500 phase1: data 3 is 0.000802 phase1: data 4 is 203.141500 phase1: data 5 is 2.710000 phase1: data 0 is 3.141500 phase1: data 1 is 3141500.000000 phase1: data 2 is -296.858500 phase1: data 3 is 0.000008 phase1: data 4 is 303.141500 phase1: data 5 is 2.710000 Kiwi Demo - Test49 phase1 finished. Test49 done. cp vcd.vcd ~/Dropbox
MONO_PATH=/home/djg11/d320/hprls/kiwipro/kiwic/distro/support mono test49.exe Kiwi Demo - Test49 starting. Kiwi Demo - Test49 phase0 starting. data 0 qfp0=333020 qfp1=333020 qfp2=0 qfp3=333020 data 1 qfp0=336350.2 qfp1=336350.2 qfp2=7.12345 qfp3=336350 data 2 qfp0=339680.4 qfp1=339680.4 qfp2=14.2469 qfp3=339680 data 3 qfp0=343010.6 qfp1=343010.6 qfp2=21.37035 qfp3=343010 data 4 qfp0=346340.8 qfp1=346340.8 qfp2=28.4938 qfp3=346340 data 5 qfp0=349671 qfp1=349671 qfp2=35.61725 qfp3=349671 Kiwi Demo - Test49 phase1 starting. data 0 is 3.1415 data 1 is 314.15 data 2 is -96.8585 data 3 is 0.031415 data 4 is 103.1415 data 5 is 2.71 data 0 is 3.1415 data 1 is 31415 data 2 is -196.8585 data 3 is 0.00031415 data 4 is 203.1415 data 5 is 2.71 data 0 is 3.1415 data 1 is 3141500 data 2 is -296.8585 data 3 is 3.1415E-06 data 4 is 303.1415 data 5 is 2.71 Test49 done.
We see that the FPGA rounding is not quite right in every case. Badly wrong in a couple of cases. This is being fixed.
Floating point implementation on FPGA has, traditionally, shown less speed-up and power advantage than integer or bit-level computation. But FPGA vendors are increasingly adding floating point support to the point where FPGA has become (or is becoming) competitive with ASIC.
Updated April 2016 UP.