// $id$
#include "UnmanagedUpsamplers.h"
#include "CudaHelperCommon.cuh"

// Simple, nearest-neighbour upsampling
__global__ void NnKernel(const float* const dMap, int dMapStride, const int inWidth, const int inHeight, const int outWidth, const int outHeight, 
						 const int sampleFactor, float* const dMapOut, const int dMapOutStride)
{
	const int x = blockDim.x * blockIdx.x + threadIdx.x;
	const int y = blockDim.y * blockIdx.y + threadIdx.y;
	const int sx = x / sampleFactor;
	const int sy = y / sampleFactor;

	if(x < outWidth && y < outHeight && sx < inWidth && sy < inHeight)
		dMapOut[dMapOutStride * y + x] = dMap[dMapStride * sy + sx];
}

void RunNnKernel(const float* inputDepth, int inputDepthStride, int inWidth, int inHeight, int outWidth, int outHeight, int sampleFactor, float* outputDepth, int outputDepthStride)
{
	dim3 blockDimension(32, 8);
	dim3 gridDimension((outWidth - 1) / blockDimension.x + 1, (outHeight - 1) / blockDimension.y + 1);

	RECORD_KERNEL_LAUNCH("NN Up-sampling kernel", gridDimension, blockDimension);
	NnKernel<<<gridDimension, blockDimension>>>(inputDepth, inputDepthStride, inWidth, inHeight, outWidth, outHeight, sampleFactor, outputDepth, outputDepthStride);
	CHECK_KERNEL_ERROR("NN Up-sampling kernel");
}