% An example script for using the CCRF framework with gradient descent for
% learning and predicting dimensional emotions, done for all 4 emotion
% dimensions

% first we add the CCRF functionality here
addpath('../lib/');

% also add data preparation helper functions
addpath('../data preparation/');

svrPredFolder = '../data/svrPreds/'; 

fprintf('\n-----------------------------\n');
fprintf('CA-CCRF gradient descent training \n------------------------------\n');

% prepare the training data (yTrain is normalised version, yTest is
% unnormalised, xLocal, corresponds to the unnormalised x values used for
% calculating offsets, as CA-CCRF might be used here need to calculate
% offsets locally

% for this sampe we only train valence
dim = 1;
lambda_a = 1e4;
lambda_b = 1e-2;
% dim = 2;
% lambda_a = 1e4;
% lambda_b = 1e-2;
% dim = 3;
% lambda_a = 1e6;
% lambda_b = 1e2;
% dim = 4;
% lambda_a = 1e6;
% lambda_b = 1e-2;

dimLabels = {'valence','arousal','power','expectancy'};
% we are using the second training set (as SVRs were trained on train1)
[xNormed, xOffsets, yNormed, yUnnormed] = prepareMultiData(dim, dimLabels, svrPredFolder, {'/train2_'});

nExamples = numel(yNormed);
nFeatures = size(xNormed{1},2);

% The learning parameters
nIterations =  600;
learningRate = 0.0000075;
threshold = 1e-4;


% number of similarity functions
nGaussians = 10;
nNeighbor = 10;
    
% this will create a family of exponential decays with different sigmas
similarities = {};
    
range = 1:nFeatures; % all features used for the exponential similarity
for i=1:nGaussians
    sigma = 2.0^(-i-5);
    exponentialDecay = @(x, m) similarityGauss(x, sigma, range, m);
    similarities = [similarities; {exponentialDecay}];
end

for i=1:nNeighbor
    neigh = i;
    neighFn = @(x, m) similarityNeighbor(x, neigh, range, m);
    similarities = [similarities; {neighFn}];
end
    
%initialize the parameter estimates, values don't matter much as the
%problem is convex (as long as they are positive)
alphas = 1 * ones(nFeatures,1);
beta = 1 * ones(numel(similarities), 1);

% we don't use indicators in this version as they don't seem to help
useIndicators = 0;
masks = cell(numel(yNormed),1);
% we train on normalised data
tic 
[alphasCCRF, betaCCRF, scaling] = CCRF_training_gradient_descent(nIterations, nExamples, learningRate, threshold, xNormed, yNormed, yUnnormed, masks, alphas, beta, lambda_a, lambda_b, similarities, useIndicators, true);
fprintf('betas: min: %.4f, med: %.4f, max: %.4f\n', min(betaCCRF), median(betaCCRF), max(betaCCRF));
fprintf('alphas: '); fprintf('%.4f\t', alphasCCRF');
alpha_prop = (alphasCCRF'*ones(nFeatures,1)) \ alphasCCRF;   
fprintf('\nalpha proportions: ');fprintf('%.4f\t', alpha_prop'); fprintf('\n');
toc;

%--- Evaluation on training partition
[corrs, rmss, meanCorr, meanRMS] = evaluateCCRFmodel(alphasCCRF, betaCCRF, xNormed, xOffsets, yUnnormed, masks, useIndicators, similarities, scaling, false);
fprintf('CA-CCRF corr on training: %.3f, rmse: %.3f\n', meanCorr, meanRMS);

%%
[xNormed, xOffsets, yNormed, yUnnormed] = prepareMultiData(dim, dimLabels, svrPredFolder, {'/test_'});

%--- Evaluation on test partition
[corrsDevel, rmssDevel, meanCorrDevel, meanRMSDevel] = evaluateCCRFmodel(alphasCCRF, betaCCRF, xNormed, xOffsets, yUnnormed, masks, useIndicators, similarities, scaling, true);
fprintf('CA-CCRF corr on test: %.3f, rmse: %.3f \n', meanCorrDevel, meanRMSDevel);       
    