// $Id: ImageDiffKernel.cu 792 2009-10-01 18:24:11Z daho2 $
#include "UnmanagedPreProcessors.h"
#include "CudaHelperCommon.cuh"

__global__ void ImageDiffKernel(unsigned int* const image, const int imageStride, const unsigned int* const subImage, const int subImageStride,
								const int width, const int height, const float mixFactor)
{
	const int x = blockDim.x * blockIdx.x + threadIdx.x;
	const int y = blockDim.y * blockIdx.y + threadIdx.y;

	if(x < width && y < height)
	{
		const unsigned int imageVal = image[imageStride * y + x];
		const unsigned int subImageVal = subImage[subImageStride * y + x];

		// Compute the residual from the two images, and mix with the original, whilst scaling each value to stay in the range 0-255.
		const unsigned int b = (1 - mixFactor) * 127.5f + 0.5f * ((1 + mixFactor) * (int)( imageVal        & 0xFF) - (1 - mixFactor) * (int)( subImageVal        & 0xFF));
        const unsigned int g = (1 - mixFactor) * 127.5f + 0.5f * ((1 + mixFactor) * (int)((imageVal >>  8) & 0xFF) - (1 - mixFactor) * (int)((subImageVal >>  8) & 0xFF));
		const unsigned int r = (1 - mixFactor) * 127.5f + 0.5f * ((1 + mixFactor) * (int)((imageVal >> 16) & 0xFF) - (1 - mixFactor) * (int)((subImageVal >> 16) & 0xFF));

		image[imageStride * y + x] = b + (g << 8) + (r << 16);
	}
}

void RunImageDiffKernel(unsigned int* image, int imageStride, unsigned int* subImage, int subImageStride, int width, int height, float mixFactor)
{
	dim3 blockDimension(32, 8);
	dim3 gridDimension((width - 1) / blockDimension.x + 1, (height - 1) / blockDimension.y + 1);

	RECORD_KERNEL_LAUNCH("Image difference kernel", gridDimension, blockDimension);
	ImageDiffKernel<<<gridDimension, blockDimension>>>(image, imageStride, subImage, subImageStride, width, height, mixFactor);
	CHECK_KERNEL_ERROR("Image difference kernel");
}
