
// Edited by DJ Greaves for Kiwi Scientific Acceleration Benchmarking

// James Riordan deleted the author's names here.
// James, please tell us where this code came from!


using System;
using KiwiSystem;

class AesPlain
{

// The number of rounds in AES Cipher.
  public const int Nr = 10;

  public struct kw16  : IEquatable<kw16> {
	public byte s_0_0;
	public byte s_0_1;
	public byte s_0_2;
	public byte s_0_3;
	public byte s_1_0;
	public byte s_1_1;
	public byte s_1_2;
	public byte s_1_3;
	public byte s_2_0;
	public byte s_2_1;
	public byte s_2_2;
	public byte s_2_3;
	public byte s_3_0;
	public byte s_3_1;
	public byte s_3_2;
	public byte s_3_3;

    public kw16(byte [] argb) {
	 s_0_0 = argb[0];
	 s_0_1 = argb[1];
	 s_0_2 = argb[2];
	 s_0_3 = argb[3];

	 s_1_0 = argb[4];
	 s_1_1 = argb[5];
	 s_1_2 = argb[6];
	 s_1_3 = argb[7];

	 s_2_0 = argb[8];
	 s_2_1 = argb[9];
	 s_2_2 = argb[10];
	 s_2_3 = argb[11];

	 s_3_0 = argb[12];
	 s_3_1 = argb[13];
	 s_3_2 = argb[14];
	 s_3_3 = argb[15];}

    public override bool Equals(object other) { return other is kw16 && ((kw16)other) == this; }
  
    public bool Equals(kw16 other)
    {
      return 
	     s_0_0 == other.s_0_0 &&
	     s_0_1 == other.s_0_1 &&
	     s_0_2 == other.s_0_2 &&
	     s_0_3 == other.s_0_3 &&
	     s_1_0 == other.s_1_0 &&
	     s_1_1 == other.s_1_1 &&
	     s_1_2 == other.s_1_2 && 
	     s_1_3 == other.s_1_3 && 
	     s_2_0 == other.s_2_0 && 
	     s_2_1 == other.s_2_1 && 
	     s_2_2 == other.s_2_2 && 
	     s_2_3 == other.s_2_3 && 
	     s_3_0 == other.s_3_0 && 
	     s_3_1 == other.s_3_1 && 
	     s_3_2 == other.s_3_2 && 
	     s_3_3 == other.s_3_3; 
    }      
    public static bool operator !=(kw16 me, kw16 other) { return !me.Equals(other); }
    public static bool operator ==(kw16 me, kw16 other) { return me.Equals(other); }


    public override int GetHashCode()
    {
      return this.GetHashCode();
    }


  };
  
  public struct row {
    public byte r_0;
    public byte r_1;
    public byte r_2;
    public byte r_3;
  };

  //      static readonly uint[] Rcon = new uint[] {
  static readonly byte[]  sbox = new byte [] {
  //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };

  static readonly byte[] rsbox = new byte [] {
  0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
  0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
  0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
  0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
  0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
  0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
  0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
  0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
  0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
  0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
  0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
  0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
  0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
  0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
  0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
  0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };


// The round constant word array, Rcon[i], contains the values given by 
// x to th e power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
// Note that i starts at 1, not 0).
static readonly byte[]  Rcon = new byte [] {
  0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 
  0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 
  0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 
  0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 
  0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 
  0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 
  0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 
  0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 
  0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 
  0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 
  0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 
  0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 
  0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 
  0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 
  0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 
  0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb  };


 byte getSBoxValue(uint num) { return sbox[num]; }

 byte getSBoxInvert(uint num) { return rsbox[num]; }

 row rotWord(byte k0, byte k1, byte k2, byte k3) { row d; d.r_0=k1; d.r_1=k2; d.r_2=k3; d.r_3=k0; return d; }

 row subword(row k) { row d;  d.r_0 = getSBoxValue(k.r_0); d.r_1 = getSBoxValue(k.r_1); d.r_2 = getSBoxValue(k.r_2); d.r_3 = getSBoxValue(k.r_3); return d; }

 kw16 subbytes(kw16 s) {
   kw16 d;
   d.s_0_0 = getSBoxValue(s.s_0_0); 
   d.s_0_1 = getSBoxValue(s.s_0_1); 
   d.s_0_2 = getSBoxValue(s.s_0_2); 
   d.s_0_3 = getSBoxValue(s.s_0_3); 
   d.s_1_0 = getSBoxValue(s.s_1_0); 
   d.s_1_1 = getSBoxValue(s.s_1_1); 
   d.s_1_2 = getSBoxValue(s.s_1_2); 
   d.s_1_3 = getSBoxValue(s.s_1_3); 
   d.s_2_0 = getSBoxValue(s.s_2_0); 
   d.s_2_1 = getSBoxValue(s.s_2_1); 
   d.s_2_2 = getSBoxValue(s.s_2_2); 
   d.s_2_3 = getSBoxValue(s.s_2_3); 
   d.s_3_0 = getSBoxValue(s.s_3_0); 
   d.s_3_1 = getSBoxValue(s.s_3_1); 
   d.s_3_2 = getSBoxValue(s.s_3_2); 
   d.s_3_3 = getSBoxValue(s.s_3_3);
   return d;
 }
 
  kw16 invsubbytes(kw16 s) {
  kw16 d;
  d.s_0_0 = getSBoxInvert(s.s_0_0); 
  d.s_0_1 = getSBoxInvert(s.s_0_1); 
  d.s_0_2 = getSBoxInvert(s.s_0_2); 
  d.s_0_3 = getSBoxInvert(s.s_0_3); 
  d.s_1_0 = getSBoxInvert(s.s_1_0); 
  d.s_1_1 = getSBoxInvert(s.s_1_1); 
  d.s_1_2 = getSBoxInvert(s.s_1_2); 
  d.s_1_3 = getSBoxInvert(s.s_1_3); 
  d.s_2_0 = getSBoxInvert(s.s_2_0); 
  d.s_2_1 = getSBoxInvert(s.s_2_1); 
  d.s_2_2 = getSBoxInvert(s.s_2_2); 
  d.s_2_3 = getSBoxInvert(s.s_2_3); 
  d.s_3_0 = getSBoxInvert(s.s_3_0); 
  d.s_3_1 = getSBoxInvert(s.s_3_1); 
  d.s_3_2 = getSBoxInvert(s.s_3_2); 
  d.s_3_3 = getSBoxInvert(s.s_3_3);
  return d;
  }

  kw16 shiftrows(kw16 s) { 
    kw16 d;
    d.s_0_0 = s.s_0_0; d.s_0_1 = s.s_1_1; d.s_0_2 = s.s_2_2; d.s_0_3 = s.s_3_3; 
    d.s_1_0 = s.s_1_0; d.s_1_1 = s.s_2_1; d.s_1_2 = s.s_3_2; d.s_1_3 = s.s_0_3; 
    d.s_2_0 = s.s_2_0; d.s_2_1 = s.s_3_1; d.s_2_2 = s.s_0_2; d.s_2_3 = s.s_1_3; 
    d.s_3_0 = s.s_3_0; d.s_3_1 = s.s_0_1; d.s_3_2 = s.s_1_2; d.s_3_3 = s.s_2_3;
    return d;
  }

  kw16 invshiftrows(kw16 s) { 
    kw16 d;
    d.s_0_0 = s.s_0_0; d.s_0_1 = s.s_3_1; d.s_0_2 = s.s_2_2; d.s_0_3 = s.s_1_3; 
    d.s_1_0 = s.s_1_0; d.s_1_1 = s.s_0_1; d.s_1_2 = s.s_3_2; d.s_1_3 = s.s_2_3; 
    d.s_2_0 = s.s_2_0; d.s_2_1 = s.s_1_1; d.s_2_2 = s.s_0_2; d.s_2_3 = s.s_3_3; 
    d.s_3_0 = s.s_3_0; d.s_3_1 = s.s_2_1; d.s_3_2 = s.s_1_2; d.s_3_3 = s.s_0_3;
    return d;
  }

  kw16 pairwisexor(kw16 s) { 
    kw16 d;
    d.s_0_0 = (byte)(s.s_0_0 ^ s.s_0_1); d.s_0_1 = (byte)( s.s_0_1 ^ s.s_0_2); d.s_0_2 = (byte)( s.s_0_2 ^ s.s_0_3); d.s_0_3 = (byte)( s.s_0_3 ^ s.s_0_0); 
    d.s_1_0 = (byte)( s.s_1_0 ^ s.s_1_1); d.s_1_1 = (byte)( s.s_1_1 ^ s.s_1_2); d.s_1_2 = (byte)( s.s_1_2 ^ s.s_1_3); d.s_1_3 = (byte)( s.s_1_3 ^ s.s_1_0); 
    d.s_2_0 = (byte)( s.s_2_0 ^ s.s_2_1); d.s_2_1 = (byte)( s.s_2_1 ^ s.s_2_2); d.s_2_2 = (byte)( s.s_2_2 ^ s.s_2_3); d.s_2_3 = (byte)( s.s_2_3 ^ s.s_2_0); 
    d.s_3_0 = (byte)( s.s_3_0 ^ s.s_3_1); d.s_3_1 = (byte)( s.s_3_1 ^ s.s_3_2); d.s_3_2 = (byte)( s.s_3_2 ^ s.s_3_3); d.s_3_3 = (byte)( s.s_3_3 ^ s.s_3_0);
    return d;
  }

  byte xtime(byte x) { return (byte)((x<<1) ^ (((x>>7) & 1) * 0x1b)); }
  
  byte multiply(int x, int y) { return multiply((byte)x, y);}
  byte multiply(byte x, int y) { return multiply(x, (byte)y);}  
  byte multiply(byte x, byte y)    
  {
    return
      (byte)
      (  ((y & 1) * x) ^                              
	 ((y>>1 & 1) * xtime(x)) ^                       
	 ((y>>2 & 1) * xtime(xtime(x))) ^                
	 ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^         
	 ((y>>4 & 1) * xtime(xtime(xtime(xtime(x))))));
  }
    
  row rowxor(kw16 s) { 
     row d;
      d.r_0 = (byte)( s.s_0_0 ^ s.s_0_1 ^ s.s_0_2 ^ s.s_0_3); 
      d.r_1 = (byte)( s.s_1_0 ^ s.s_1_1 ^ s.s_1_2 ^ s.s_1_3); 
      d.r_2 = (byte)( s.s_2_0 ^ s.s_2_1 ^ s.s_2_2 ^ s.s_2_3); 
      d.r_3 = (byte)( s.s_3_0 ^ s.s_3_1 ^ s.s_3_2 ^ s.s_3_3);
      return d;
    }

  kw16 staterowxor(kw16 s, row k) { 
    kw16 d;
    d.s_0_0 = (byte)(s.s_0_0 ^ k.r_0); d.s_0_1 = (byte)(s.s_0_1 ^ k.r_0); d.s_0_2 = (byte)(s.s_0_2 ^ k.r_0); d.s_0_3 = (byte)(s.s_0_3 ^ k.r_0); 
    d.s_1_0 = (byte)(s.s_1_0 ^ k.r_1); d.s_1_1 = (byte)(s.s_1_1 ^ k.r_1); d.s_1_2 = (byte)(s.s_1_2 ^ k.r_1); d.s_1_3 = (byte)(s.s_1_3 ^ k.r_1); 
    d.s_2_0 = (byte)(s.s_2_0 ^ k.r_2); d.s_2_1 = (byte)(s.s_2_1 ^ k.r_2); d.s_2_2 = (byte)(s.s_2_2 ^ k.r_2); d.s_2_3 = (byte)(s.s_2_3 ^ k.r_2); 
    d.s_3_0 = (byte)(s.s_3_0 ^ k.r_3); d.s_3_1 = (byte)(s.s_3_1 ^ k.r_3); d.s_3_2 = (byte)(s.s_3_2 ^ k.r_3); d.s_3_3 = (byte)(s.s_3_3 ^ k.r_3);
    return d;
  }

  kw16 statestatexor(kw16 s, kw16 t)
  { 
    kw16 d;
    d.s_0_0 = (byte)(s.s_0_0 ^ t.s_0_0); d.s_0_1 = (byte)(s.s_0_1 ^ t.s_0_1); d.s_0_2 = (byte)(s.s_0_2 ^ t.s_0_2); d.s_0_3 = (byte)(s.s_0_3 ^ t.s_0_3); 
    d.s_1_0 = (byte)(s.s_1_0 ^ t.s_1_0); d.s_1_1 = (byte)(s.s_1_1 ^ t.s_1_1); d.s_1_2 = (byte)(s.s_1_2 ^ t.s_1_2); d.s_1_3 = (byte)(s.s_1_3 ^ t.s_1_3); 
    d.s_2_0 = (byte)(s.s_2_0 ^ t.s_2_0); d.s_2_1 = (byte)(s.s_2_1 ^ t.s_2_1); d.s_2_2 = (byte)(s.s_2_2 ^ t.s_2_2); d.s_2_3 = (byte)(s.s_2_3 ^ t.s_2_3); 
    d.s_3_0 = (byte)(s.s_3_0 ^ t.s_3_0); d.s_3_1 = (byte)(s.s_3_1 ^ t.s_3_1); d.s_3_2 = (byte)(s.s_3_2 ^ t.s_3_2); d.s_3_3 = (byte)(s.s_3_3 ^ t.s_3_3);
    return d;
  }

  kw16 mapxtime(kw16 s) { 
    kw16 d;
    d.s_0_0 = xtime(s.s_0_0); 
    d.s_0_1 = xtime(s.s_0_1); 
    d.s_0_2 = xtime(s.s_0_2); 
    d.s_0_3 = xtime(s.s_0_3); 
    d.s_1_0 = xtime(s.s_1_0); 
    d.s_1_1 = xtime(s.s_1_1); 
    d.s_1_2 = xtime(s.s_1_2); 
    d.s_1_3 = xtime(s.s_1_3); 
    d.s_2_0 = xtime(s.s_2_0); 
    d.s_2_1 = xtime(s.s_2_1); 
    d.s_2_2 = xtime(s.s_2_2); 
    d.s_2_3 = xtime(s.s_2_3); 
    d.s_3_0 = xtime(s.s_3_0); 
    d.s_3_1 = xtime(s.s_3_1); 
    d.s_3_2 = xtime(s.s_3_2); 
    d.s_3_3 = xtime(s.s_3_3);
    return d;
  }

  kw16 invmixcolumns(kw16 s) { 
    kw16 d;
    d.s_0_0 = (byte)(multiply(s.s_0_0, 0x0e) ^ multiply(s.s_0_1, 0x0b) ^ multiply(s.s_0_2, 0x0d) ^ multiply(s.s_0_3, 0x09)); 
    d.s_0_1 = (byte)(multiply(s.s_0_0, 0x09) ^ multiply(s.s_0_1, 0x0e) ^ multiply(s.s_0_2, 0x0b) ^ multiply(s.s_0_3, 0x0d)); 
    d.s_0_2 = (byte)(multiply(s.s_0_0, 0x0d) ^ multiply(s.s_0_1, 0x09) ^ multiply(s.s_0_2, 0x0e) ^ multiply(s.s_0_3, 0x0b)); 
    d.s_0_3 = (byte)(multiply(s.s_0_0, 0x0b) ^ multiply(s.s_0_1, 0x0d) ^ multiply(s.s_0_2, 0x09) ^ multiply(s.s_0_3, 0x0e)); 
    d.s_1_0 = (byte)(multiply(s.s_1_0, 0x0e) ^ multiply(s.s_1_1, 0x0b) ^ multiply(s.s_1_2, 0x0d) ^ multiply(s.s_1_3, 0x09)); 
    d.s_1_1 = (byte)(multiply(s.s_1_0, 0x09) ^ multiply(s.s_1_1, 0x0e) ^ multiply(s.s_1_2, 0x0b) ^ multiply(s.s_1_3, 0x0d)); 
    d.s_1_2 = (byte)(multiply(s.s_1_0, 0x0d) ^ multiply(s.s_1_1, 0x09) ^ multiply(s.s_1_2, 0x0e) ^ multiply(s.s_1_3, 0x0b)); 
    d.s_1_3 = (byte)(multiply(s.s_1_0, 0x0b) ^ multiply(s.s_1_1, 0x0d) ^ multiply(s.s_1_2, 0x09) ^ multiply(s.s_1_3, 0x0e)); 
    d.s_2_0 = (byte)(multiply(s.s_2_0, 0x0e) ^ multiply(s.s_2_1, 0x0b) ^ multiply(s.s_2_2, 0x0d) ^ multiply(s.s_2_3, 0x09)); 
    d.s_2_1 = (byte)(multiply(s.s_2_0, 0x09) ^ multiply(s.s_2_1, 0x0e) ^ multiply(s.s_2_2, 0x0b) ^ multiply(s.s_2_3, 0x0d)); 
    d.s_2_2 = (byte)(multiply(s.s_2_0, 0x0d) ^ multiply(s.s_2_1, 0x09) ^ multiply(s.s_2_2, 0x0e) ^ multiply(s.s_2_3, 0x0b)); 
    d.s_2_3 = (byte)(multiply(s.s_2_0, 0x0b) ^ multiply(s.s_2_1, 0x0d) ^ multiply(s.s_2_2, 0x09) ^ multiply(s.s_2_3, 0x0e)); 
    d.s_3_0 = (byte)(multiply(s.s_3_0, 0x0e) ^ multiply(s.s_3_1, 0x0b) ^ multiply(s.s_3_2, 0x0d) ^ multiply(s.s_3_3, 0x09)); 
    d.s_3_1 = (byte)(multiply(s.s_3_0, 0x09) ^ multiply(s.s_3_1, 0x0e) ^ multiply(s.s_3_2, 0x0b) ^ multiply(s.s_3_3, 0x0d)); 
    d.s_3_2 = (byte)(multiply(s.s_3_0, 0x0d) ^ multiply(s.s_3_1, 0x09) ^ multiply(s.s_3_2, 0x0e) ^ multiply(s.s_3_3, 0x0b)); 
    d.s_3_3 = (byte)(multiply(s.s_3_0, 0x0b) ^ multiply(s.s_3_1, 0x0d) ^ multiply(s.s_3_2, 0x09) ^ multiply(s.s_3_3, 0x0e));
    return d;
  }

  public static void printstate(string str, kw16 s)
  {
  Console.Write("{0}", str);
  Console.Write(" {0:x2} {1:x2}",  s.s_0_0, s.s_0_1);
  Console.Write(" {0:x2} {1:x2}",  s.s_0_2, s.s_0_3);
  Console.Write("   {0:x2} {1:x2}",  s.s_1_0, s.s_1_1);
  Console.Write(" {0:x2} {1:x2}",  s.s_1_2, s.s_1_3);
  Console.Write("   {0:x2} {1:x2} ",  s.s_2_0, s.s_2_1);
  Console.Write(" {0:x2} {1:x2}", s.s_2_2, s.s_2_3);
  Console.Write("   {0:x2} {1:x2}", s.s_3_0, s.s_3_1);
  Console.Write(" {0:x2} {1:x2}",   s.s_3_2, s.s_3_3 );
  Console.WriteLine();
}
  
  // struct state AES128_encrypt(struct state in, struct state key);
  // struct state AES128_decrypt(struct state in, struct state key);
  
  public kw16 AES128_encrypt(kw16 din, kw16 key)
  {
    byte round = 0;
  
    // Add the First round key to the state before starting the rounds.
    kw16 t1 = statestatexor(din, key);
    din = t1;
    //AesPlain.printstate("t1 start:", t1);    
    // There will be Nr rounds.
    // The first Nr-1 rounds are identical.
    // These Nr-1 rounds are executed in the loop below.
    for(round = 1; round < Nr; round++)
      {
        Kiwi.Pause();
        //Console.WriteLine("   Encr round {0}/{1}", round, Nr);
        //Subbytes
        kw16 t2 = subbytes(din);
	//Shift rows
	kw16 t3 = shiftrows(t2);
	//Mix columns
	kw16 t4 = pairwisexor(t3);
	kw16 t5 = mapxtime(t4);
	row xors = rowxor(t3);
	kw16 xormask = staterowxor(t5,xors);
	kw16 t6 = statestatexor(t3,xormask);
	//Generate new key
	row s1 = rotWord(key.s_3_0, key.s_3_1, key.s_3_2, key.s_3_3);
	row scramble = subword(s1);
	scramble.r_0 ^= Rcon[round];
	key.s_0_0 ^= scramble.r_0; key.s_0_1 ^= scramble.r_1; key.s_0_2 ^= scramble.r_2; key.s_0_3 ^= scramble.r_3;
	key.s_1_0 ^= key.s_0_0; key.s_1_1 ^= key.s_0_1; key.s_1_2 ^= key.s_0_2; key.s_1_3 ^= key.s_0_3;
	key.s_2_0 ^= key.s_1_0; key.s_2_1 ^= key.s_1_1; key.s_2_2 ^= key.s_1_2; key.s_2_3 ^= key.s_1_3;
	key.s_3_0 ^= key.s_2_0; key.s_3_1 ^= key.s_2_1; key.s_3_2 ^= key.s_2_2; key.s_3_3 ^= key.s_2_3;
	//Add round key
	kw16 t7 = statestatexor(t6, key);
	din = t7;
        //AesPlain.printstate(" interim:", din);    
     }
    Kiwi.Pause();
    // The last round is given below.
    // The MixColumns function is not here in the last round.
    kw16 t8 = subbytes(din);
    kw16 t9 = shiftrows(t8);
    row s1a = rotWord(key.s_3_0, key.s_3_1, key.s_3_2, key.s_3_3);
    row scramble1 = subword(s1a);
    scramble1.r_0 ^= Rcon[round];
    key.s_0_0 ^= scramble1.r_0; key.s_0_1 ^= scramble1.r_1; key.s_0_2 ^= scramble1.r_2; key.s_0_3 ^= scramble1.r_3;
    key.s_1_0 ^= key.s_0_0; key.s_1_1 ^= key.s_0_1; key.s_1_2 ^= key.s_0_2; key.s_1_3 ^= key.s_0_3;
    key.s_2_0 ^= key.s_1_0; key.s_2_1 ^= key.s_1_1; key.s_2_2 ^= key.s_1_2; key.s_2_3 ^= key.s_1_3;
    key.s_3_0 ^= key.s_2_0; key.s_3_1 ^= key.s_2_1; key.s_3_2 ^= key.s_2_2; key.s_3_3 ^= key.s_2_3;
    kw16 t10 = statestatexor(t9, key);
        Kiwi.Pause();
    return t10;
  }
  
  public kw16 AES128_decrypt(kw16 din, kw16 key)
   {
      byte round = 0;
      for(round = 1; round <= Nr; round++)
	{
          //Generate new key
          row s1 = rotWord(key.s_3_0, key.s_3_1, key.s_3_2, key.s_3_3);
          row scramble = subword(s1);
	  scramble.r_0 ^= Rcon[round];
	  key.s_0_0 ^= scramble.r_0; key.s_0_1 ^= scramble.r_1; key.s_0_2 ^= scramble.r_2; key.s_0_3 ^= scramble.r_3;
	  key.s_1_0 ^= key.s_0_0; key.s_1_1 ^= key.s_0_1; key.s_1_2 ^= key.s_0_2; key.s_1_3 ^= key.s_0_3;
	  key.s_2_0 ^= key.s_1_0; key.s_2_1 ^= key.s_1_1; key.s_2_2 ^= key.s_1_2; key.s_2_3 ^= key.s_1_3;
	  key.s_3_0 ^= key.s_2_0; key.s_3_1 ^= key.s_2_1; key.s_3_2 ^= key.s_2_2; key.s_3_3 ^= key.s_2_3;
        }

      // Add the First round key to the state before starting the rounds.
      kw16 t1 = statestatexor(din, key);
      din = t1;
      
      // There will be Nr rounds.
      // The first Nr-1 rounds are identical.
      // These Nr-1 rounds are executed in the loop below.
      for(round=Nr-1;round>0;round--)
	{
          //Inv Shift Rows
          kw16 t2 = invshiftrows(din);
	  //Inv Sub bytes
	  kw16 t3 = invsubbytes(t2);
	  //Generate new key
	  key.s_3_0 ^= key.s_2_0; key.s_3_1 ^= key.s_2_1; key.s_3_2 ^= key.s_2_2; key.s_3_3 ^= key.s_2_3;
	  key.s_2_0 ^= key.s_1_0; key.s_2_1 ^= key.s_1_1; key.s_2_2 ^= key.s_1_2; key.s_2_3 ^= key.s_1_3;
	  key.s_1_0 ^= key.s_0_0; key.s_1_1 ^= key.s_0_1; key.s_1_2 ^= key.s_0_2; key.s_1_3 ^= key.s_0_3;
	  row s1 = rotWord(key.s_3_0, key.s_3_1, key.s_3_2, key.s_3_3);
	  row scramble = subword(s1);
	  scramble.r_0 ^= Rcon[round+1];
	  key.s_0_0 ^= scramble.r_0; key.s_0_1 ^= scramble.r_1; key.s_0_2 ^= scramble.r_2; key.s_0_3 ^= scramble.r_3;
	  //Add round key
	  kw16 t4 = statestatexor(t3, key);
	  //Inv Mix Columns
	  kw16 t5 = invmixcolumns(t4);
	  din = t5;
       }
      
      // The last round is given below.
      // The MixColumns function is not here in the last round.
      //Inv Shift Rows
      kw16 t6 = invshiftrows(din);
      //Inv Sub bytes
      kw16 t7 = invsubbytes(t6);
      //Generate new key
      key.s_3_0 ^= key.s_2_0; key.s_3_1 ^= key.s_2_1; key.s_3_2 ^= key.s_2_2; key.s_3_3 ^= key.s_2_3;
      key.s_2_0 ^= key.s_1_0; key.s_2_1 ^= key.s_1_1; key.s_2_2 ^= key.s_1_2; key.s_2_3 ^= key.s_1_3;
      key.s_1_0 ^= key.s_0_0; key.s_1_1 ^= key.s_0_1; key.s_1_2 ^= key.s_0_2; key.s_1_3 ^= key.s_0_3;
      row s1a = rotWord(key.s_3_0, key.s_3_1, key.s_3_2, key.s_3_3);
      row scramble2 = subword(s1a);
      scramble2.r_0 ^= Rcon[round+1];
      key.s_0_0 ^= scramble2.r_0; key.s_0_1 ^= scramble2.r_1; key.s_0_2 ^= scramble2.r_2; key.s_0_3 ^= scramble2.r_3;
      //Add round key
      kw16 t8 = statestatexor(t7, key);
      return t8;
    }
   
}

class TestBench
{
   
  [Kiwi.OutputBitPort("keyflag")]  static bool keyflag;
  [Kiwi.OutputBitPort("running")]  static bool running;
  [Kiwi.InputBitPort("start")]  static bool start; // We emulate Riordan's start/stop directing here rather than use Kiwi-preferred style.


  [Kiwi.InputWordPort("dinhi")]  static ulong dinhi;
  [Kiwi.InputWordPort("dinlo")]  static ulong dinlo;

  [Kiwi.InputWordPort("kinhi")]  static ulong kinhi;
  [Kiwi.InputWordPort("kinlo")]  static ulong kinlo;

  [Kiwi.OutputWordPort("douthi")]  static ulong douthi;
  [Kiwi.OutputWordPort("doutlo")]  static ulong doutlo;



   static void test_encrypt_ecb_verbose()
   {
      AesPlain dut = new AesPlain();
      // Example of more verbose verification
      // 128bit key
      AesPlain.kw16 key = new AesPlain.kw16(new byte [] { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c });
      // 512bit text

  AesPlain.kw16 [] plain_text = new AesPlain.kw16[4];
  plain_text[0] = new AesPlain.kw16(new byte[] { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a});
  
  plain_text[1] = new AesPlain.kw16(new byte[] { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51});
  plain_text[2] = new AesPlain.kw16(new byte[] { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef});
  plain_text[3] = new AesPlain.kw16(new byte[] { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10});
  
      // print text to encrypt, key and IV
      Console.WriteLine("ECB encrypt verbose:\n\n");
      Console.WriteLine("plain text:\n");
      for(int i = 0; i < 4; ++i)
	{
           AesPlain.printstate("", plain_text[i]);
        }
      Console.WriteLine();
      AesPlain.printstate("key:\n",key);
      Console.WriteLine("\n");

      // print the resulting cipher as 4 x 16 byte strings
      Console.WriteLine("ciphertext:\n");
      for(int i = 0; i < 4; ++i)
	{
            AesPlain.printstate("", dut.AES128_encrypt(plain_text[i], key));
        }
      Console.WriteLine();
  }





  
  static void sendout(AesPlain.kw16 dd)
  {
    uint d0 = (uint)(dd.s_0_0  + dd.s_0_1*8  + dd.s_0_2*16  + dd.s_0_3*24);
    uint d1 = (uint)(dd.s_1_0  + dd.s_1_1*8  + dd.s_1_2*16  + dd.s_1_3*24);
    uint d2 = (uint)(dd.s_2_0  + dd.s_2_1*8  + dd.s_2_2*16  + dd.s_2_3*24);
    uint d3 = (uint)(dd.s_3_0  + dd.s_3_1*8  + dd.s_3_2*16  + dd.s_3_3*24);
    doutlo = ((ulong)(d1) << 32) | ((ulong)(d0) << 0);
    douthi = ((ulong)(d3) << 32) | ((ulong)(d2) << 0);
  }



  [Kiwi.HardwareEntryPoint()]
  static void test_encrypt_ecb()
  {
       Console.WriteLine("Starting aesplain.cs");
       AesPlain dut = new AesPlain();
  
    AesPlain.kw16 key = new AesPlain.kw16(new byte [] { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c });
	  
    AesPlain.kw16 din0  = new AesPlain.kw16(new byte [] {0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a});
    AesPlain.kw16 dout = new AesPlain.kw16(new byte [] {0x3a, 0xd7, 0x7b, 0xb4, 0x0d, 0x7a, 0x36, 0x60, 0xa8, 0x9e, 0xca, 0xf3, 0x24, 0x66, 0xef, 0x97});

    Kiwi.Pause(); // End Of Elaboration
    AesPlain.kw16 result;

    for (int ii=3; ii>=0; ii--)
      {
       AesPlain.printstate(" Loop din0:", din0);
       AesPlain.printstate(" Loop key:", key);
        AesPlain.kw16 din = din0;
        running = !running;
        din.s_0_0 += (byte)ii;
        sendout (din);
        Kiwi.Pause();
        Console.WriteLine("ECB encrypt: {0} start", ii);
        result = dut.AES128_encrypt(din, key);
        sendout (result);
        Console.WriteLine("ECB encrypt: {0} done at {1}", ii, Kiwi.tnow);
        AesPlain.printstate("", result);
      }
    
    if (dout == result) 
      {
          Console.WriteLine("SUCCESS!\n");
      }
    else
      {
        Console.WriteLine("FAILURE!\n");
        AesPlain.printstate("Expected: ", dout);
        AesPlain.printstate("Result: ", result);
      }
  }

   
 static void test_decrypt_ecb()
 {
    AesPlain dut = new AesPlain();

    AesPlain.kw16 key = new AesPlain.kw16(new byte [] { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c });

    AesPlain.kw16 din  = new AesPlain.kw16(new byte [] {0x3a, 0xd7, 0x7b, 0xb4, 0x0d, 0x7a, 0x36, 0x60, 0xa8, 0x9e, 0xca, 0xf3, 0x24, 0x66, 0xef, 0x97});

    AesPlain.kw16 dout = new AesPlain.kw16(new byte [] {0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a});
    
    AesPlain.kw16 result = dut.AES128_decrypt(din, key);
    
    Console.WriteLine("ECB decrypt: ");
    
    if(dout == result)
      {
         Console.WriteLine("SUCCESS!\n");
      }
    else
     {
       Console.WriteLine("FAILURE!\n");
       //AesPlain.printstate("Expected:\n", out);
       AesPlain.printstate("Result:\n", result);
     }
 }
 
 static int Main()
 {
   test_encrypt_ecb();
   test_decrypt_ecb();
   test_encrypt_ecb_verbose();
   return 0;
 }

}
// eof
