// $Id: Utils.cuh 908 2010-03-14 22:19:32Z cr333 $

#ifndef UTIL_CUH
#define UTIL_CUH

#include <stdio.h>
#include "cudatemplates/devicememory.hpp"

/*#define PRINT_GRID(grid)            printf("Grid  : %3d x %3d       blocks  (%d in total)\n", grid.x, grid.y, grid.x * grid.y);
#define PRINT_BLOCK(block)          printf("Block : %3d x %3d x %3d threads (%d in total)\n", block.x, block.y, block.z, block.x * block.y * block.z);
#define PRINT_THREADS(grid, block)  printf("Total number of threads: %d\n", grid.x * grid.y * block.x * block.y * block.z);
#define PRINT_GRID_BLOCK_INFO(grid, block) { PRINT_GRID(grid); PRINT_BLOCK(block); PRINT_THREADS(grid, block); }*/

inline __device__ float Euclidean(const float3 x1, const float3 x2)
{
	return sqrtf((x1.x - x2.x) * (x1.x - x2.x) + (x1.y - x2.y) * (x1.y - x2.y) + (x1.z - x2.z) * (x1.z - x2.z));
}

inline unsigned int floorPow2(const unsigned int n)
{
    // adapted from <http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn>
    unsigned int v = n;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    v = (v >> 1) + 1;
    return v;
}

//== data conversion utils ========================================================================

inline __device__ __host__ uchar4 int_to_uchar4(const unsigned int i)
{
    return make_uchar4(i & 0xff, (i >> 8) & 0xff, (i >> 16) & 0xff, (i >> 24) & 0xff);
}

// uchar4 with 0..255 components => float3 with 0..1 components
inline __device__ __host__ float3 uchar4_to_float3(const uchar4 c)
{
	return make_float3(float(c.x) / 255.0f, float(c.y) / 255.0f, float(c.z) / 255.0f);
}

// new-style utils ----------------

template<typename T> inline __device__ __host__ unsigned int pack_xyzw(const T i)
{
	return
		((unsigned int)i.x & 0xff) +
		(((unsigned int)i.y & 0xff) << 8) +
		(((unsigned int)i.z & 0xff) << 16) +
		(((unsigned int)i.w & 0xff) << 24);
}

template<typename T1, typename T2> inline __device__ __host__ T2 select_xyz(const T1 i)
{
	T2 t;
	t.x = i.x;
	t.y = i.y;
	t.z = i.z;
	return t;
}

template<typename T> inline __device__ __host__ T unpack_xyzw(const unsigned int i)
{
	T t;
	t.x = i & 0xff;
	t.y = (i >> 8) & 0xff;
	t.z = (i >> 16) & 0xff;
	t.w = (i >> 24) & 0xff;
	return t;
}


template <typename Type, unsigned int Dim> inline cudaPitchedPtr toPitchedPtr(const Cuda::DeviceMemory<Type, Dim>& devMemory)
{
    cudaPitchedPtr pitchDevPtr;
    pitchDevPtr.ptr = (void*)devMemory.getBuffer();
    pitchDevPtr.pitch = devMemory.getPitch();
    pitchDevPtr.xsize = devMemory.getPitch();
    pitchDevPtr.ysize = devMemory.size[1];
    return pitchDevPtr;
}

template <typename Type, unsigned int Dim> inline cudaPitchedPtr toPitchedPtr(const Cuda::DeviceMemory<Type, Dim>* devMemory)
{ return toPitchedPtr<Type, Dim>(*devMemory); }

//== colour conversion utils ======================================================================

// sRGB (0..1) to linear RGB (0..1)
inline __device__ float3 srgb2rgb(const float3 c)
{
	return make_float3(
		c.x <= 0.04045f ? c.x / 12.92f : __powf((c.x + 0.055f) / 1.055f, 2.4f),
		c.y <= 0.04045f ? c.y / 12.92f : __powf((c.y + 0.055f) / 1.055f, 2.4f),
		c.z <= 0.04045f ? c.z / 12.92f : __powf((c.z + 0.055f) / 1.055f, 2.4f)
	);
}

// linear RGB (0..1) to XZY (0..1) using sRGB primaries
inline __device__ float3 rgb2xyz(const float3 c)
{
	return make_float3(
		0.4124564f * c.x + 0.3575761f * c.y + 0.1804375f * c.z,
		0.2126729f * c.x + 0.7151522f * c.y + 0.0721750f * c.z,
		0.0193339f * c.x + 0.1191920f * c.y + 0.9503041f * c.z
	);
}

// linear RGB (0..1) to HSL (0..1)
inline __device__ float3 rgb2hsl(const float3& c)
{
	const float cmin = fminf(c.x, fminf(c.y, c.z));
	const float cmax = fmaxf(c.x, fmaxf(c.y, c.z));

	float h = 0.0f;
	if(cmin == cmax)           { /* h = 0.0f; */ }
	else if(cmax == c.x)       { h = ((c.y - c.z) / (cmax - cmin) + 6.0f) / 6.0f; if(h >= 1.0f) h -= 1.0f; }
	else if(cmax == c.y)       { h = ((c.z - c.x) / (cmax - cmin) + 2.0f) / 6.0f; }
	else /* if(cmax == c.z) */ { h = ((c.x - c.y) / (cmax - cmin) + 4.0f) / 6.0f; }

	const float l = 0.5f * (cmin + cmax);

	float s = 0.0f;
	if(cmin == cmax) { /* s = 0.0f; */ }
	else if(l <= 0.5f) { s = (cmax - cmin) / (2.0f * l); }
	else /* if(l > 0.5f) */ { s = (cmax - cmin) / (2.0f - 2.0f * l); }

	return make_float3(h, s, l);
}

// XYZ (0..1) to CIELAB (0..100) assuming D65 whitepoint
inline __device__ float3 xyz2lab(const float3 c)
{
	// assuming whitepoint D65, XYZ=(0.95047, 1.00000, 1.08883)
	float3 r = make_float3(c.x / 0.95047f, c.y, c.z / 1.08883f);

	float3 f = make_float3(
		(r.x > 216.0f / 24389.0f ? cbrtf(r.x) : (24389.0f / 27.0f * r.x + 16.0f) / 116.0f),
		(r.y > 216.0f / 24389.0f ? cbrtf(r.y) : (24389.0f / 27.0f * r.y + 16.0f) / 116.0f),
		(r.z > 216.0f / 24389.0f ? cbrtf(r.z) : (24389.0f / 27.0f * r.z + 16.0f) / 116.0f)
	);

	// location of xzy2lab-bug (CR 2010-03-14): 116.0f * f.x - 16.0f is wrong
	return make_float3(116.0f * f.y - 16.0f, 500.0f * (f.x - f.y), 200.0f * (f.y - f.z));
}

#endif // UTIL_CUH