// $Id: DcbGrid.cuh 904 2010-03-10 16:59:52Z cr333 $

#ifndef DCBGRID_CUH_INCLUDED
#define DCBGRID_CUH_INCLUDED

#define DCBGRID_STANDALONE

#include "cudatemplates/devicememory.hpp"
#include "cudatemplates/devicememoryreference.hpp"
#include "CudaMath.h"
#include "Utils.cuh"

template <typename T> class GpuTiledImages3D;

#ifdef DCBGRID_STANDALONE
#include "UnmanagedAggregators.h"
#else
#include "settings.h"
#include "stereo.h"
#endif


//---- Coefficients for Gaussian smoothing --------------------------------------------------------

//// sigma = 1.0
//#define NORMAL_0 1.0f
//#define NORMAL_1 0.60653066f
//#define NORMAL_2 0.135335283f

// sigma = 1 / sqrt(2) = 0.707106781
#define NORMAL_0 1.0f
#define NORMAL_1 __expf(-1.0f/1.0f)
#define NORMAL_2 __expf(-4.0f/1.0f)


//---- Colour access functions --------------------------------------------------------------------

//// just gets the green component (which is the biggest contributor to grey)
//inline __device__ __host__ float getColour1(const unsigned int pixel)
//{
//	return (pixel >> 8) & 0xff;
//}

//// luminance (assuming linear sRGB primaries)
//inline __device__ __host__ float getColour1(const unsigned int pixel)
//{
//	return 0.2126729f * (pixel & 0xff) + 0.7151522f * ((pixel >>  8) & 0xff) + 0.0721750f * ((pixel >> 16) & 0xff);
//}

//// luminance (assuming sRGB primaries)
//inline __device__ float getColour1(const unsigned int pixel)
//{
//	return
//		0.2126729f * 255.0f * __powf(( pixel        & 0xff) / 255.0, 1/2.2f) +
//		0.7151522f * 255.0f * __powf(((pixel >>  8) & 0xff) / 255.0, 1/2.2f) +
//		0.0721750f * 255.0f * __powf(((pixel >> 16) & 0xff) / 255.0, 1/2.2f);
//}

// lightness (assuming sRGB primaries)
inline __device__ float getColour1(const unsigned int pixel)
{
	// convert RGB32 to float3 with 0..1 components
	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;

	// sRGB to linear RGB
	c = make_float3(
		c.x <= 0.04045f ? c.x / 12.92f : __powf((c.x + 0.055f) / 1.055f, 2.4f),
		c.y <= 0.04045f ? c.y / 12.92f : __powf((c.y + 0.055f) / 1.055f, 2.4f),
		c.z <= 0.04045f ? c.z / 12.92f : __powf((c.z + 0.055f) / 1.055f, 2.4f)
	);

	// linear RGB to Y
	float y = 0.2126729f * c.x + 0.7151522f * c.y + 0.0721750f * c.z;

	// Y to L
	return 116.0f * (y > 216.0f / 24389.0f ? cbrtf(y) : (24389.0f / 27.0f * y + 16.0f) / 116.0f) - 16.0f;
}

//---- Various second colour dimensions (for comparison) ------------------------------------------

//// sRGB /////////////////////////////////////////////////////////////////////////////////////////

//// colour differentiator: sRGB's red
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	return (pixel & 0xff) * (100.f / 255.0f);
//}

//// colour differentiator: sRGB's green
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	return ((pixel >> 8) & 0xff) * (100.f / 255.0f);
//}

//// colour differentiator: sRGB's blue
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	return ((pixel >> 16) & 0xff) * (100.f / 255.0f);
//}

//// XYZ //////////////////////////////////////////////////////////////////////////////////////////

//// colour differentiator: XYZ's X
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIEXYZ
//	c = rgb2xyz(srgb2rgb(c));
//
//	return 100.0f * __saturatef(c.x);
//}

//// colour differentiator: XYZ's Y
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIEXYZ
//	c = rgb2xyz(srgb2rgb(c));
//
//	return 100.0f * __saturatef(c.y);
//}

//// colour differentiator: XYZ's Z
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIEXYZ
//	c = rgb2xyz(srgb2rgb(c));
//
//	return 100.0f * __saturatef(c.z);
//}

//// xyz //////////////////////////////////////////////////////////////////////////////////////////

//// colour differentiator: XYZ's x
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIEXYZ
//	c = rgb2xyz(srgb2rgb(c));
//
//	return 100.0f * __saturatef(c.x / (c.x + c.y + c.z));
//}

//// colour differentiator: XYZ's y
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIEXYZ
//	c = rgb2xyz(srgb2rgb(c));
//
//	return 100.0f * __saturatef(c.y / (c.x + c.y + c.z));
//}

//// colour differentiator: XYZ's z
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIEXYZ
//	c = rgb2xyz(srgb2rgb(c));
//
//	return 100.0f * __saturatef(c.z / (c.x + c.y + c.z));
//}

//// CIELAB ///////////////////////////////////////////////////////////////////////////////////////

//// colour differentiator: a*
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIELAB
//	c = xyz2lab(rgb2xyz(srgb2rgb(c)));
//
//	return 0.5f * (100.0f + c.y);
//}

//// colour differentiator: b*
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIELAB
//	c = xyz2lab(rgb2xyz(srgb2rgb(c)));
//
//	return 0.5f * (100.0f + c.z);
//}

//// colour differentiator: C*_ab (chroma)
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIELAB
//	c = xyz2lab(rgb2xyz(srgb2rgb(c)));
//
//	return sqrtf(c.y * c.y + c.z * c.z);
//}

// colour differentiator: h_ab (hue) => best one, as per paper
inline __device__ float getColour2(const unsigned int pixel)
{
	// convert RGB32 to float3 with 0..1 components
	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;

	// convert to CIELAB
	c = xyz2lab(rgb2xyz(srgb2rgb(c)));

	// map [-pi, +pi] to [0, 100]
	return 50.0f * (1.0f + atan2f(c.z, c.y) / 3.14159265f);
}

//// colour differentiator: s*_ab (saturation) = C*_ab / L*
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//
//	// convert to CIELAB
//	c = xyz2lab(rgb2xyz(srgb2rgb(c)));
//
//	return 100.0f * __saturatef(sqrtf(c.y * c.y + c.z * c.z) / c.x);
//}

//// HSL //////////////////////////////////////////////////////////////////////////////////////////

//// colour differentiator: HSL's hue
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//	return 100.0f * __saturatef(rgb2hsl(srgb2rgb(c)).x);
//}

//// colour differentiator: HSL's saturation
//inline __device__ float getColour2(const unsigned int pixel)
//{
//	// convert RGB32 to float3 with 0..1 components
//	float3 c = select_xyz<float4, float3>(unpack_xyzw<float4>(pixel)) / 255.0f;
//	return 100.0f * __saturatef(rgb2hsl(srgb2rgb(c)).y);
//}


//---- Memory indexing functions ------------------------------------------------------------------

//// unsafe macros for 2D/3D indexing
//#define index2D(x1, w1, x2) ((x2) * (w1) + (x1))
//#define index3D(x1, w1, x2, w2, x3) (((x3) * (w2) + (x2)) * (w1) + (x1))

// type-safe inline functions for 2D/3D indexing
template <typename T, typename U> inline __device__ T index2D(
	const T x1, const unsigned int w1,
	const U x2)
{ return w1 * x2 + x1; }

template <typename T, typename U, typename V> inline __device__ T index3D(
	const T x1, const unsigned int w1,
	const U x2, const unsigned int w2,
	const V x3)
{ return (x3 * w2 + x2) * w1 + x1; }

template <typename T, typename U, typename V, typename W> inline __device__ T index4D(
	const T x1, const unsigned int w1,
	const U x2, const unsigned int w2,
	const V x3, const unsigned int w3,
	const W x4)
{ return ((x4 * w3 + x3) * w2 + x2) * w1 + x1; }


//---- 1. Colour-blind per-frame DCB grid ---------------------------------------------------------

struct Config
{
	const Cuda::DeviceMemory<float, 3>* gpuCost;
	const Cuda::DeviceMemory<const unsigned int, 2>* gpuImg1;
	const Cuda::DeviceMemory<const unsigned int, 2>* gpuImg2;
	Cuda::DeviceMemory<float2, 2>* gpuGrid;

	unsigned int w;
	unsigned int h;
	unsigned int numDisps;
	float sigmaS; 
	float sigmaC;

	unsigned int dw;
	unsigned int dh;
	unsigned int dc;
	unsigned int tile_x;
	unsigned int tile_y;
};

// Compute all dimensions (for internal use)
void CalculateGridTextureSize(Config& c);

// just computes texture size
void CalculateGridTextureSize(
	const unsigned int w, const unsigned int h, const unsigned int numDisps,
	const float sigmaS, const float sigmaC,
	unsigned int& texWidth, unsigned int& texHeight);

void RunCreateGridKernel(Config& c);
void RunProcessGridKernel(Config& c);
void RunSliceGridKernel(Config& c);


//---- 2. Colour-blind double-frame DCB grid (doesn't work well) ----------------------------------

// create single grid from current & previous stereo images & cost
void RunCreateGridKernel(Config& c,
	const Cuda::DeviceMemoryReference3D<float>& prevCost,
	const Cuda::DeviceMemoryReference2D<const unsigned int>& prevImageL,
	const Cuda::DeviceMemoryReference2D<const unsigned int>& prevImageR);


//---- 3. Colour-blind multi-frame DCB grid -------------------------------------------------------

void RunMultiSliceGridKernel(Config& c, Cuda::DeviceMemoryReference2D<float2>** grids, unsigned int numGrids, const int weighting, const float wa, const float wb);


//---- 4. Partially colour-blind per-frame DCB grid -----------------------------------------------

struct Config2
{
	const Cuda::DeviceMemory<float, 3>* gpuCost;
	const Cuda::DeviceMemory<const unsigned int, 2>* gpuImg1;
	const Cuda::DeviceMemory<const unsigned int, 2>* gpuImg2;
	GpuTiledImages3D<float2>* gpuGrids;

	unsigned int w;
	unsigned int h;
	unsigned int numDisps;
	float sigmaS;
	float sigmaC1;
	float sigmaC2;

	unsigned int dw;
	unsigned int dh;
	unsigned int dc1;
	unsigned int dc2;
};

// Compute all dimensions (for internal use)
void CalculateGridTextureSize(Config2& config);

// just computes texture size
void CalculateGridTextureSize(
	const unsigned int w, const unsigned int h, const unsigned int numDisps,
	const float sigmaS, const float sigmaC1, const float sigmaC2,
	unsigned int& texWidth, unsigned int& texHeight);

void RunCreateGridKernel2(Config2& c);
void RunProcessGridKernel2(Config2& c);
void RunSliceGridKernel2(Config2& c);

#endif // DCBGRID_CUH_INCLUDED