/**
 * An test of software prefetching utility when altering the number of arrays
 * to indirect through and the number of prefetches to generate.  This code is
 * based on is.c from the NAS parallel benchmark suite and is released under
 * the same licence, as provided below.
 *
 * Timothy M. Jones and Sam Ainsworth
 */

/*--------------------------------------------------------------------
  
  NAS Parallel Benchmarks 2.3 OpenACC C versions - IS

  This benchmark is an OpenACC C version of the NPB IS code.
  
  The OpenACC C versions are derived from the serial C versions in
  "NPB 3.3-serial" and OpenMP C versions in "NPB 2.3-omp" 
  developed by NAS.

  Permission to use, copy, distribute and modify this software for any
  purpose with or without fee is hereby granted.
  This software is provided "as is" without express or implied warranty.
  
  Information on NAS Parallel Benchmarks 2.3 is available at:
  
           http://www.nas.nasa.gov/NAS/NPB/

--------------------------------------------------------------------*/
/*--------------------------------------------------------------------

  Author: M. Yarrow
          H. Jin 

  OpenMP C version: S. Satoh
  OpenACC C version: P. Makpaisit
  
--------------------------------------------------------------------*/

#include <stdlib.h>
#include <stdio.h>

/*************/
/*  CLASS B  */
/*************/
#define  TOTAL_KEYS_LOG_2    25
#define  MAX_KEY_LOG_2       21
#define  NUM_BUCKETS_LOG_2   10


#define  TOTAL_KEYS          (1 << TOTAL_KEYS_LOG_2)
#define  MAX_KEY             (1 << MAX_KEY_LOG_2)
#define  NUM_BUCKETS         (1 << NUM_BUCKETS_LOG_2)
#define  NUM_KEYS            TOTAL_KEYS
#define  SIZE_OF_BUFFERS     NUM_KEYS  
#define  TEST_ARRAY_SIZE     5
                                           

#define  MAX_ITERATIONS      32


#ifndef NARRAYS
#define NARRAYS 8
#endif

#ifndef NSWPF
#define NSWPF 0
#endif

#if NSWPF > NARRAYS
#error "Prefetching more arrays than exist"
#endif

#define PFDIST(arr) ((64/NSWPF)*(NSWPF+1-arr))


int array1[SIZE_OF_BUFFERS];
int array2[SIZE_OF_BUFFERS];
#if NARRAYS > 2
int array3[SIZE_OF_BUFFERS];
#endif
#if NARRAYS > 3
int array4[SIZE_OF_BUFFERS];
#endif
#if NARRAYS > 4
int array5[SIZE_OF_BUFFERS];
#endif
#if NARRAYS > 5
int array6[SIZE_OF_BUFFERS];
#endif
#if NARRAYS > 6
int array7[SIZE_OF_BUFFERS];
#endif
#if NARRAYS > 7
int array8[SIZE_OF_BUFFERS];
#endif
#if NARRAYS > 8
int array9[SIZE_OF_BUFFERS];
#endif
#if NARRAYS > 9
int array10[SIZE_OF_BUFFERS];
#endif

int key_array[SIZE_OF_BUFFERS];



/********************/
/* Some global info */
/********************/
int *key_buff_ptr_global;         /* used by full_verify to get */
                                       /* copies of rank info        */

int      passed_verification;

int test_index_array[TEST_ARRAY_SIZE],
         test_rank_array[TEST_ARRAY_SIZE],

         S_test_index_array[TEST_ARRAY_SIZE] = 
                             {48427,17148,23627,62548,4431},
         S_test_rank_array[TEST_ARRAY_SIZE] = 
                             {0,18,346,64917,65463},

         W_test_index_array[TEST_ARRAY_SIZE] = 
                             {357773,934767,875723,898999,404505},
         W_test_rank_array[TEST_ARRAY_SIZE] = 
                             {1249,11698,1039987,1043896,1048018},

         A_test_index_array[TEST_ARRAY_SIZE] = 
                             {2112377,662041,5336171,3642833,4250760},
         A_test_rank_array[TEST_ARRAY_SIZE] = 
                             {104,17523,123928,8288932,8388264},

         B_test_index_array[TEST_ARRAY_SIZE] = 
                             {41869,812306,5102857,18232239,26860214},
         B_test_rank_array[TEST_ARRAY_SIZE] = 
                             {33422937,10244,59149,33135281,99};
                                 


void full_verify( void );

/*****************************************************************/
/*************           R  A  N  D  L  C             ************/
/*************                                        ************/
/*************    portable random number generator    ************/
/*****************************************************************/

double	randlc( double *X, double *A )
{
      static int        KS=0;
      static double	R23, R46, T23, T46;
      double		T1, T2, T3, T4;
      double		A1;
      double		A2;
      double		X1;
      double		X2;
      double		Z;
      int     		i, j;

      if (KS == 0) 
      {
        R23 = 1.0;
        R46 = 1.0;
        T23 = 1.0;
        T46 = 1.0;
    
        for (i=1; i<=23; i++)
        {
          R23 = 0.50 * R23;
          T23 = 2.0 * T23;
        }
        for (i=1; i<=46; i++)
        {
          R46 = 0.50 * R46;
          T46 = 2.0 * T46;
        }
        KS = 1;
      }

/*  Break A into two parts such that A = 2^23 * A1 + A2 and set X = N.  */

      T1 = R23 * *A;
      j  = T1;
      A1 = j;
      A2 = *A - T23 * A1;

/*  Break X into two parts such that X = 2^23 * X1 + X2, compute
    Z = A1 * X2 + A2 * X1  (mod 2^23), and then
    X = 2^23 * Z + A2 * X2  (mod 2^46).                            */

      T1 = R23 * *X;
      j  = T1;
      X1 = j;
      X2 = *X - T23 * X1;
      T1 = A1 * X2 + A2 * X1;
      
      j  = R23 * T1;
      T2 = j;
      Z = T1 - T23 * T2;
      T3 = T23 * Z + A2 * X2;
      j  = R46 * T3;
      T4 = j;
      *X = T3 - T46 * T4;
      return(R46 * *X);
} 




/*****************************************************************/
/*************      C  R  E  A  T  E  _  S  E  Q      ************/
/*****************************************************************/

void	create_seq( double seed, double a, int *key_array )
{
	double x;
	int    i, k;

        k = MAX_KEY/4;

	for (i=0; i<NUM_KEYS; i++)
	{
	    x = randlc(&seed, &a);
	    x += randlc(&seed, &a);
    	    x += randlc(&seed, &a);
	    x += randlc(&seed, &a);  

            key_array[i] = k*x;
	}
}


/*****************************************************************/
/*************    F  U  L  L  _  V  E  R  I  F  Y     ************/
/*****************************************************************/


void full_verify()
{
    int    i, j;
    int    k;
    int    m, unique_keys;


    
/*  Now, finally, sort the keys:  */
    for( i=0; i<NUM_KEYS; i++ )
        key_array[--key_buff_ptr_global[
#if NARRAYS == 2
    array2
#elif NARRAYS == 3
    array3
#elif NARRAYS == 4
    array4
#elif NARRAYS == 5
    array5
#elif NARRAYS == 6
    array6
#elif NARRAYS == 7
    array7
#elif NARRAYS == 8
    array8
#elif NARRAYS == 9
    array9
#elif NARRAYS == 10
    array10
#endif
[i]]] = 
#if NARRAYS == 2
    array2
#elif NARRAYS == 3
    array3
#elif NARRAYS == 4
    array4
#elif NARRAYS == 5
    array5
#elif NARRAYS == 6
    array6
#elif NARRAYS == 7
    array7
#elif NARRAYS == 8
    array8
#elif NARRAYS == 9
    array9
#elif NARRAYS == 10
    array10
#endif
[i];


/*  Confirm keys correctly sorted: count incorrectly sorted keys, if any */

    j = 0;
    for( i=1; i<NUM_KEYS; i++ )
        if( key_array[i-1] > key_array[i] )
            j++;


    if( j != 0 )
    {
        printf( "Full_verify: number of keys out of sort: %d\n",
                j );
    }
    else
        passed_verification++;
           

}


/*****************************************************************/
/*************             R  A  N  K             ****************/
/*****************************************************************/

void rank( int iteration )
{
    int i;

    array1[iteration] = iteration;
    array1[iteration+MAX_ITERATIONS] = MAX_KEY - iteration;

/*  Clear the work array */
    for( i=0; i<MAX_KEY; i++ )
#if NARRAYS == 2
    array2
#elif NARRAYS == 3
    array3
#elif NARRAYS == 4
    array4
#elif NARRAYS == 5
    array5
#elif NARRAYS == 6
    array6
#elif NARRAYS == 7
    array7
#elif NARRAYS == 8
    array8
#elif NARRAYS == 9
    array9
#elif NARRAYS == 10
    array10
#endif
        [i] = 0;

/*  Ranking of all keys occurs in this section:                 */
    for( i=0; i<NUM_KEYS; i++ ) {

        /* Prefetches - array1 */
#if NSWPF > 0
        __builtin_prefetch(&array1[i+PFDIST(1)]);
#endif

        /* Prefetches - array2 */
#if NSWPF > 1
        if (i+PFDIST(2) < NUM_KEYS)
            __builtin_prefetch(&array2[array1[i+PFDIST(2)]]);
#endif

        /* Prefetches - array3 */
#if NSWPF > 2
        if (i+PFDIST(3) < NUM_KEYS)
            __builtin_prefetch(&array3[array2[array1[i+PFDIST(3)]]]);
#endif

        /* Prefetches - array4 */
#if NSWPF > 3
        if (i+PFDIST(4) < NUM_KEYS)
          __builtin_prefetch(&array4[array3[array2[array1[i+PFDIST(4)]]]]);
#endif

        /* Prefetches - array5 */
#if NSWPF > 4
        if (i+PFDIST(5) < NUM_KEYS)
          __builtin_prefetch(&array5[array4[array3[array2[array1[i+PFDIST(5)]]]]]);
#endif

        /* Prefetches - array6 */
#if NSWPF > 5
        if (i+PFDIST(6) < NUM_KEYS)
          __builtin_prefetch(&array6[array5[array4[array3[array2[array1[i+PFDIST(6)]]]]]]);
#endif

        /* Prefetches - array7 */
#if NSWPF > 6
        if (i+PFDIST(7) < NUM_KEYS)
          __builtin_prefetch(&array7[array6[array5[array4[array3[array2[array1[i+PFDIST(7)]]]]]]]);
#endif

        /* Prefetches - array8 */
#if NSWPF > 7
        if (i+PFDIST(8) < NUM_KEYS)
          __builtin_prefetch(&array8[array7[array6[array5[array4[array3[array2[array1[i+PFDIST(8)]]]]]]]]);
#endif

        /* Prefetches - array9 */
#if NSWPF > 8
        if (i+PFDIST(9) < NUM_KEYS)
          __builtin_prefetch(&array9[array8[array7[array6[array5[array4[array3[array2[array1[i+PFDIST(9)]]]]]]]]]);
#endif

        /* Prefetches - array10 */
#if NSWPF > 9
        if (i+PFDIST(10) < NUM_KEYS)
          __builtin_prefetch(&array10[array9[array8[array7[array6[array5[array4[array3[array2[array1[i+PFDIST(10)]]]]]]]]]]);
#endif

        /* The real work */
#if NARRAYS > 9
        array10[
#endif
#if NARRAYS > 8
                array9[
#endif
#if NARRAYS > 7
                       array8[
#endif
#if NARRAYS > 6
                              array7[
#endif
#if NARRAYS > 5
                                     array6[
#endif
#if NARRAYS > 4
                                            array5[
#endif
#if NARRAYS > 3
                                                   array4[
#endif
#if NARRAYS > 2
                                                          array3[
#endif
                                                                 array2[
                                                                        array1[i]
                                                                        ]
#if NARRAYS > 2
                                                                 ]
#endif
#if NARRAYS > 3
                                                          ]
#endif
#if NARRAYS > 4
                                                   ]
#endif
#if NARRAYS > 5
                                            ]
#endif
#if NARRAYS > 6
                                     ]
#endif
#if NARRAYS > 7
                              ]
#endif
#if NARRAYS > 8
                       ]
#endif
#if NARRAYS > 9
                ]
#endif
          ++;
    }
}


#include <time.h>
     
     clock_t start, end;
     double cpu_time_used;


/*****************************************************************/
/*************             M  A  I  N             ****************/
/*****************************************************************/

int main( int argc, char **argv )
{
    int             i, iteration, itemp, maxiterations;
    double          timecounter, maxtime;

    maxiterations = MAX_ITERATIONS;
    for (i = 2; i < NARRAYS; ++i) {
      maxiterations >>= 1;
    }
    maxiterations = maxiterations > 2 ? maxiterations : 2;

    printf("\n\n Testing software prefetching large numbers of arrays\n\n");
    printf(" Size: %d\n", TOTAL_KEYS);
    printf(" Arrays: %d\n", NARRAYS);
    printf(" Iterations: %d\n", maxiterations);

/*  Initialize timer  */             
    timer_clear( 1 );
    
        long secs=0,msecs=0;

/*  Generate random number sequence and subsequent keys on all procs */
    create_seq( 314159265.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array1);
#if NARRAYS > 2
    create_seq( 314159267.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array2);
#endif
#if NARRAYS > 3
    create_seq( 314159269.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array3);
#endif
#if NARRAYS > 4
    create_seq( 314159271.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array4);
#endif
#if NARRAYS > 5
    create_seq( 314159273.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array5);
#endif
#if NARRAYS > 6
    create_seq( 314159275.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array6);
#endif
#if NARRAYS > 7
    create_seq( 314159279.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array7);
#endif

#if NARRAYS > 8
    create_seq( 314159281.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array8);
#endif

#if NARRAYS > 9
    create_seq( 314159283.00,                    /* Random number gen seed */
                1220703125.00,                   /* Random number gen mult */
                array9);
#endif

/*  Do one interation for free (i.e., untimed) to guarantee initialization of
    all data and code pages and respective tables */
    rank( 1 );

    printf( "\n   iteration\n" );

/*  Start timer  */             

        start = clock();


/*  This is the main iteration */    
    for( iteration=1; iteration<=maxiterations; iteration++ )
    {
        printf( "        %d\n", iteration );
	
        rank( iteration );
    }

/*  End of timing, obtain maximum time of all processors */
    timer_stop( 1 );
    timecounter = timer_read( 1 );
    
       end = clock();
    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
    
    printf("time : %f\n",cpu_time_used );
}
