// $Id: CostSpaceMadKernel.cu 792 2009-10-01 18:24:11Z daho2 $
#include "UnmanagedCostComputers.h"
#include "CudaHelperCommon.cuh"
#include "CostKernelBase.h"

// Computes the product of per-component colour differences between the two pixel values
__device__ float MultAbsDifferenceCost(unsigned int p1, unsigned int p2)
{
	return 1.0f - 
		(1.0f - fabs((float)(p1 & 0xFF) / 255.0f - (float)(p2 & 0xFF) / 255.0f)) *
		(1.0f - fabs((float)((p1 >> 8) & 0xFF) / 255.0f - (float)((p2 >> 8) & 0xFF) / 255.0f)) * 
		(1.0f - fabs((float)((p1 >> 16) & 0xFF) / 255.0f - (float)((p2 >> 16) & 0xFF) / 255.0f));
}

// Implement the kernels (these macros can be found in "CostKernelBase.h")
IMPLEMENT_COST_KERNEL(CostSpaceMadKernel, MultAbsDifferenceCost)

void RunCostSpaceMadKernel(unsigned int* leftImage, unsigned int* rightImage, int stride, int width, int height, int disparityMax, 
					  float rescaleGradient, float rescaleLimit, const cudaPitchedPtr & result)
{
	int sharedArrayLen = min(width, 384);

	dim3 blockDimension(sharedArrayLen, 1);
	dim3 gridDimension((width - disparityMax - 1) / (blockDimension.x - disparityMax) + 1, height);

	RECORD_KERNEL_LAUNCH("Cost space kernel", gridDimension, blockDimension);

	CostSpaceMadKernel<<<gridDimension, blockDimension, sharedArrayLen * sizeof(unsigned int)>>>(
		leftImage, rightImage, stride, width, height, disparityMax, sharedArrayLen, rescaleGradient, rescaleLimit, result);

	CHECK_KERNEL_ERROR("Cost space kernel");
}