function [Mkxy, dX, dY] = register_ncc(source_img, target_img, opt)
%%% well, I guess this function is for non-rigid registration

if ~exist('opt','var') % Default parameters of LDDMM
    max_it = 1000;
    lemda = 1;  % Step size of the velocity update
    sigma = 10; % Size of Gaussian smoothing
    beta = -0.2; % Parameter for the Penalty term
    alpha = 0.84; % Stencile of the penalty
    gamma = 0.16;
    multiRes = 0;
else    % User inputed parameters
    max_it = opt.max_it;
    lemda = opt.lemda;  % Step size of the velocity update
    sigma = opt.sigma; % Size of Gaussian smoothing
    beta = opt.beta;
    alpha = opt.alpha; 
    gamma = 1-alpha;
    multiRes = opt.multiRes;
end

source_img = double(source_img);
target_img = double(target_img);

if multiRes ~= 0
    img_org = source_img;
    [M, N] = size(source_img);
    source_img = imresize(source_img,[M/(2^(multiRes)),N/(2^(multiRes))],'bilinear');
    target_img = imresize(target_img,[M/(2^(multiRes)),N/(2^(multiRes))],'bilinear');
end

GauImg = fspecial('gaussian',[5,5],0.6);
img_gau = imfilter(source_img, GauImg, 'same');
[sizeY, sizeX] = size(source_img);
[Xxy, Yxy] = meshgrid(1:sizeX, 1:sizeY);
X = Xxy; 
Y = Yxy;
Gau=fspecial('gaussian',[21,21],sigma); % Gaussian Kernel for smoothing
it = 1;
stop = 1;

Lx = sizeX;
Ly = sizeY;

if mod(sizeX,2)==0
    kkx = (2*pi/Lx)*[0:(sizeX/2-1) (-sizeX/2):(-1)];
else
    kkx = (2*pi/Lx)*[0:(sizeX/2) (-sizeX/2):(-1)];
end

if mod(sizeY,2)==0
    kky = (2*pi/Ly)*[0:(sizeY/2-1) (-sizeY/2):(-1)];
else
    kky = (2*pi/Ly)*[0:(sizeY/2) (-sizeY/2):(-1)];
end



% kkx = (2*pi/Lx)*[0:(sizeX/2-1) (-sizeX/2):(-1)];
% kky = (2*pi/Ly)*[0:(sizeY/2-1) (-sizeY/2):(-1)];
[KX KY]  = meshgrid(kkx,kky);                               
delsq = 2*alpha*( (1-cos(KX))/(1/1^2) + (1-cos(KY))/(1/1^2))+ gamma;
delsq = -delsq.^2;
delsq(1,1) = 1;    

% h = fspecial('gaussian',[5,5],0.6);
% delsq = fft2(h, [sizeY, sizeX]);
% delsq = delsq.^2;

a_xiyi = img_gau - mean(img_gau(:));
v1 = std(img_gau(:)).^2;
Mkxy = interp2(target_img, Xxy, Yxy);
Mkxy(isnan(Mkxy))=0;
num = sum(sum(((img_gau-mean(img_gau(:))).*(Mkxy-mean(Mkxy(:))))));
den = std(img_gau(:))*std(Mkxy(:));
NCC(it) = (num/den);
strDis = ['Normalized Cross-Correlation: ',num2str(NCC(it))];
disp(strDis);

while (it<max_it)&&(stop>0)%&&(abs(stop/NCC(1))>1e-2)
    it = it + 1;
%     figure(999),imshow(source_img, []);hold on;
%     temp = Mkxy*2-1;
%     contour(temp,[0 0],'r'); hold off;
    figure(999),imshow(Mkxy, []);
    [Sx, Sy] = gradient(Mkxy);    
    
    Dv12_Dh = (a_xiyi - mean(a_xiyi(:))).*Sx;
    Dv12_Dg = (a_xiyi - mean(a_xiyi(:))).*Sy;

    Dv2_Dh = (1/std(Mkxy(:))).*(Mkxy - mean(Mkxy(:))).*Sx;
    Dv2_Dg = (1/std(Mkxy(:))).*(Mkxy - mean(Mkxy(:))).*Sy;

    v2sq4 = var(Mkxy(:)).^2;
    v2 = std(Mkxy(:));
    v12h = mean((Mkxy(:)-mean(Mkxy(:))).*(img_gau(:)-mean(img_gau(:))));

    DNCC_Dh = (Dv12_Dh.*sqrt(v2sq4).*v12h - v2.*Dv2_Dh.*(v12h.^2))./(v1*v2sq4);
    DNCC_Dg = (Dv12_Dg.*sqrt(v2sq4).*v12h - v2.*Dv2_Dg.*(v12h.^2))./(v1*v2sq4);  
    
    fhat_x = fft2(DNCC_Dh);
    Vx1 = real(ifft2(fhat_x./delsq));
    Vx1 = Vx1 - Vx1(1,1);  % Specify arbitrary constant by forcing corner u = 0.  
    fhat_y = fft2(DNCC_Dg);
    Vy1 = real(ifft2(fhat_y./delsq));
    Vy1 = Vy1 - Vy1(1,1);  % Specify arbitrary constant by forcing corner u = 0.        
    Vx1(:,1) = 0;Vx1(:,sizeX) = 0;Vx1(1,:) = 0;Vx1(sizeY,:) = 0;
    Vy1(:,1) = 0;Vy1(:,sizeX) = 0;Vy1(1,:) = 0;Vy1(sizeY,:) = 0;
     

    upd_h = DNCC_Dh + beta*Vx1;%reshape(L'*L*DNCC_Dh(:),[sizeY, sizeX]);
    upd_g = DNCC_Dg + beta*Vy1;%reshape(L'*L*DNCC_Dg(:),[sizeY, sizeX]);
    
    ui_gau = lemda*imfilter(upd_h, Gau, 'same');
    vi_gau = lemda*imfilter(upd_g, Gau, 'same'); 
    ui_gau(1,:)=0;ui_gau(end,:)=0;ui_gau(:,1)=0;ui_gau(:,end)=0;
    vi_gau(1,:)=0;vi_gau(end,:)=0;vi_gau(:,1)=0;vi_gau(:,end)=0;
    
%     Xxy = interp2(Xxy, X - ui_gau, Y - vi_gau);
%     Yxy = interp2(Yxy, X - ui_gau, Y - vi_gau);
    Xxy = (Xxy + ui_gau);
    Yxy = (Yxy + vi_gau);
    def_X(:,:,it) = Xxy;
    def_Y(:,:,it) = Yxy; 
    
    Mkxy = interp2(target_img, Xxy, Yxy);
    Mkxy(isnan(Mkxy))=0;
    num = sum(sum(((img_gau-mean(img_gau(:))).*(Mkxy-mean(Mkxy(:))))));
    den = std(img_gau(:))*std(Mkxy(:));
    NCC(it) = (num/den);
    strDis = ['Normalized Cross-Correlation: ',num2str(NCC(it))];
    disp(strDis);
    
    if it>=3
        stop = (NCC(it)-NCC(it-1))*(NCC(it-1)-NCC(it-2));
    end    
end

dX = def_X(:,:,it-1);
dX(dX(:,2)<1,2) = 1; 
dX(dX(:,sizeX-1)>sizeX,sizeX-1) = sizeX;

dY = def_Y(:,:,it-1);
dY(2,dY(2,:)<1) = 1; 
dY(sizeY-1,dY(sizeY-1,:)>sizeY) = sizeY;

if multiRes ~= 0
    Mkxy = imresize(Mkxy,[M,N],'bilinear');
    level = graythresh(Mkxy);
    target_img = im2bw(Mkxy,level);
    [Mkxy] = optimizeNCC_v3(img_org, target_img, 0);
end
