The following CSharp file was compiled with mono mcs.
//
// Kiwi Scientific Acceleration Example - Simple floating point tests.
// (C) 2014 DJ Greaves, University of Cambridge, Computer Laboratory.
//
using System;
using System.Text;
using KiwiSystem;
using System.Diagnostics;
public class test49
{
const int problemSize = 6;
static double [] data = new double [problemSize];
static volatile int volx = 100; // This defeats compile-time constant propagation.
public static void test49_phase0()
{
Console.WriteLine("Kiwi Demo - Test49 phase0 starting.");
for (int i=0; i<problemSize; i++)
{
double qfp0 = (double)((volx+i)*3330.2);
Kiwi.Pause();
Console.WriteLine("data {0} qfp0={1}", i, qfp0);
float qfp1 = (float) qfp0;
Kiwi.Pause();
float qfp2 = 7.12345f * (float) i;
Kiwi.Pause();
int qfp3 = (int) qfp1;
Console.WriteLine(" qfp1={0} qfp2={1} qfp3={2}", qfp1, qfp2, qfp3);
}
}
public static void test49_phase1()
{
Console.WriteLine("Kiwi Demo - Test49 phase1 starting.");
Kiwi.Pause();
for (int i=0; i<problemSize; i++) data[i] = 3.1415;
data[problemSize-1] = 2.71;
for (int it=0; it<3; it++)
{
Kiwi.Pause();
data[1] *= 100.0;
data[2] -= 100.0;
data[3] /= 100.0;
data[4] += 100.0;
for (int i=0; i<problemSize; i++)
{
Console.WriteLine("data {0} is {1}", i, data[i]);
}
}
}
[Kiwi.HardwareEntryPoint()]
public static void Main()
{
Console.WriteLine("Kiwi Demo - Test49 starting.");
Kiwi.Pause();
test49_phase0();
test49_phase1();
Console.WriteLine("Test49 done.");
}
}
module DUT(input clk, input reset);
function [31:0] hpr_dbl2flt4;
input [63:0] arg;
reg signi;
reg [10:0] expi;
reg [51:0] manti;
reg [7:0] expo;
reg [22:0] manto;
reg overflow, scase_inf, scase_zero, scase_nan, fail;
begin
{ signi, expi, manti } = arg; // Deconstruct input arg
scase_zero = (arg[62:0] == 63'd0);
scase_inf = (expi == 11'h7ff) && (manti == 0);
scase_nan = (expi == 11'h7ff) && (manti != 0);
// We can report fail on overflow but better to report infinity.
fail = 0;
overflow = (expi[10] == expi[9]) ||(expi[10] == expi[8]) ||(expi[10] == expi[7]);
expo = { expi[10], expi[6:0]};
manto = manti[51:51-22];
scase_inf = scase_inf || overflow;
hpr_dbl2flt4[31] = signi;
hpr_dbl2flt4[30:23] = (scase_inf)? 8'hff: (scase_nan)? 8'hff: (scase_zero)? 8'd0: expo;
hpr_dbl2flt4[22:0] = (scase_inf)? 23'd0: (scase_nan)? -23'd1: (scase_zero)? 23'd0: manto;
end
endfunction
function [63:0] hpr_flt2dbl3;
input [31:0] darg;
hpr_flt2dbl3 = {darg[31], darg[30], {3{~darg[30]}}, darg[29:23], darg[22:0], {29{1'b0}}};
endfunction
... snip ...
module DUT(input clk, input reset);
function [31:0] hpr_dbl2flt4;
input [63:0] arg;
reg signi;
reg [10:0] expi;
reg [51:0] manti;
reg [7:0] expo;
reg [22:0] manto;
reg overflow, scase_inf, scase_zero, scase_nan, fail;
begin
{ signi, expi, manti } = arg; // Deconstruct input arg
scase_zero = (arg[62:0] == 63'd0);
scase_inf = (expi == 11'h7ff) && (manti == 0);
scase_nan = (expi == 11'h7ff) && (manti != 0);
fail = 0;
overflow = (expi[10] == expi[9]) ||(expi[10] == expi[8]) ||(expi[10] == expi[7]);
expo = { expi[10], expi[6:0]};
manto = manti[51:51-22];
scase_inf = scase_inf || overflow;
hpr_dbl2flt4[31] = signi;
hpr_dbl2flt4[30:23] = (scase_inf)? 8'hff: (scase_nan)? 8'hff: (scase_zero)? 8'd0: expo;
hpr_dbl2flt4[22:0] = (scase_inf)? 23'd0: (scase_nan)? -23'd1: (scase_zero)? 23'd0: manto;
end
endfunction
function [63:0] hpr_flt2dbl3;
input [31:0] darg;
hpr_flt2dbl3 = {darg[31], darg[30], {3{~darg[30]}}, darg[29:23], darg[22:0], {29{1'b0}}};
endfunction
Full RTL output file: test49.v (Verilog).
//
// Kiwi Scientific Acceleration
// University of Cambridge, Computer Laboratory
//
// vsys.v - A test wrapper for simulating very simple tests with clock and reset.
// (C) 2010-16 DJ Greaves, University of Cambridge.
//
//
//
`timescale 1ns/1ns
module SIMSYS();
reg clk, reset;
initial begin reset = 1; clk = 1; # 33 reset = 0; end
always #5 clk = !clk; // 10ns period for clock = 100 MHz (lowish FPGA clock freq!)
initial begin # (100 * 1000 * 1000) $display("Finish HDL simulation on timeout %t.", $time); $finish(); end
initial begin $dumpfile("vcd.vcd"); $dumpvars(); end
DUT the_dut(.clk(clk), .reset(reset));
endmodule
The above, generated RTL is run on the Icarus verilog simulator.
iverilog vsys.v test49.v /home/djg11/d320/hprls/kiwipro/kiwic/distro/lib/cvgates.v /home/djg11/d320/hprls/kiwipro/kiwic/distro/lib/cv_fparith.v
./a.out
VCD info: dumpfile vcd.vcd opened for output.
Kiwi Demo - Test49 starting.
Kiwi Demo - Test49 phase0 starting.
data 0 qfp0=333020.000000
qfp1=333019.968750 qfp2=0.000000 qfp3=333019
data 1 qfp0=336350.200000
qfp1=336350.187500 qfp2=7.123450 qfp3=336350
data 2 qfp0=339680.400000
qfp1=339680.375000 qfp2=14.246900 qfp3=339680
data 3 qfp0=343010.600000
qfp1=343010.593750 qfp2=21.370348 qfp3=343010
data 4 qfp0=346340.800000
qfp1=346340.781250 qfp2=28.493799 qfp3=346340
data 5 qfp0=349671.000000
qfp1=349670.968750 qfp2=35.617249 qfp3=349670
Kiwi Demo - Test49 phase1 starting.
phase1: data 0 is 3.141500
phase1: data 1 is 314.150000
phase1: data 2 is -96.858500
phase1: data 3 is 0.031415
phase1: data 4 is 103.141500
phase1: data 5 is 2.710000
phase1: data 0 is 3.141500
phase1: data 1 is 31415.000000
phase1: data 2 is -196.858500
phase1: data 3 is 0.000802
phase1: data 4 is 203.141500
phase1: data 5 is 2.710000
phase1: data 0 is 3.141500
phase1: data 1 is 3141500.000000
phase1: data 2 is -296.858500
phase1: data 3 is 0.000008
phase1: data 4 is 303.141500
phase1: data 5 is 2.710000
Kiwi Demo - Test49 phase1 finished.
Test49 done.
cp vcd.vcd ~/Dropbox
MONO_PATH=/home/djg11/d320/hprls/kiwipro/kiwic/distro/support mono test49.exe
Kiwi Demo - Test49 starting.
Kiwi Demo - Test49 phase0 starting.
data 0 qfp0=333020
qfp1=333020 qfp2=0 qfp3=333020
data 1 qfp0=336350.2
qfp1=336350.2 qfp2=7.12345 qfp3=336350
data 2 qfp0=339680.4
qfp1=339680.4 qfp2=14.2469 qfp3=339680
data 3 qfp0=343010.6
qfp1=343010.6 qfp2=21.37035 qfp3=343010
data 4 qfp0=346340.8
qfp1=346340.8 qfp2=28.4938 qfp3=346340
data 5 qfp0=349671
qfp1=349671 qfp2=35.61725 qfp3=349671
Kiwi Demo - Test49 phase1 starting.
data 0 is 3.1415
data 1 is 314.15
data 2 is -96.8585
data 3 is 0.031415
data 4 is 103.1415
data 5 is 2.71
data 0 is 3.1415
data 1 is 31415
data 2 is -196.8585
data 3 is 0.00031415
data 4 is 203.1415
data 5 is 2.71
data 0 is 3.1415
data 1 is 3141500
data 2 is -296.8585
data 3 is 3.1415E-06
data 4 is 303.1415
data 5 is 2.71
Test49 done.
We see that the FPGA rounding is not quite right in every case. Badly wrong in a couple of cases. This is being fixed.
Floating point implementation on FPGA has, traditionally, shown less speed-up and power advantage than integer or bit-level computation. But FPGA vendors are increasingly adding floating point support to the point where FPGA has become (or is becoming) competitive with ASIC.
Updated April 2016 UP.