% This function belongs to Piotr Dollar's Toolbox
% http://vision.ucsd.edu/~pdollar/toolbox/doc/index.html
% Please refer to the above web page for definitions and clarifications
%
% Calculates the distance between sets of vectors.
%
% Let X be an m-by-p matrix representing m points in p-dimensional space
% and Y be an n-by-p matrix representing another set of points in the same
% space. This function computes the m-by-n distance matrix D where D(i,j)
% is the distance between X(i,:) and Y(j,:). This function has been
% optimized where possible, with most of the distance computations
% requiring few or no loops.
%
% The metric can be one of the following:
%
% 'euclidean' / 'sqeuclidean':
% Euclidean / SQUARED Euclidean distance. Note that 'sqeuclidean'
% is significantly faster.
%
% 'chisq'
% The chi-squared distance between two vectors is defined as:
% d(x,y) = sum( (xi-yi)^2 / (xi+yi) ) / 2;
% The chi-squared distance is useful when comparing histograms.
%
% 'cosine'
% Distance is defined as the cosine of the angle between two vectors.
%
% 'emd'
% Earth Mover's Distance (EMD) between positive vectors (histograms).
% Note for 1D, with all histograms having equal weight, there is a simple
% closed form for the calculation of the EMD. The EMD between histograms
% x and y is given by the sum(abs(cdf(x)-cdf(y))), where cdf is the
% cumulative distribution function (computed simply by cumsum).
%
% 'L1'
% The L1 distance between two vectors is defined as: sum(abs(x-y));
%
%
% USAGE
% D = pdist2( X, Y, [metric] )
%
% INPUTS
% X - [m x p] matrix of m p-dimensional vectors
% Y - [n x p] matrix of n p-dimensional vectors
% metric - ['sqeuclidean'], 'chisq', 'cosine', 'emd', 'euclidean', 'L1'
%
% OUTPUTS
% D - [m x n] distance matrix
%
% EXAMPLE
% [X,IDX] = demoGenData(100,0,5,4,10,2,0);
% D = pdist2( X, X, 'sqeuclidean' );
% distMatrixShow( D, IDX );
%
% See also PDIST, DISTMATRIXSHOW
% Piotr's Image&Video Toolbox Version 2.0
% Copyright (C) 2007 Piotr Dollar. [pdollar-at-caltech.edu]
% Please email me if you find bugs, or have suggestions or questions!
% Licensed under the Lesser GPL [see external/lgpl.txt]
function D = pdist2( X, Y, metric )
if( nargin<3 || isempty(metric) ); metric=0; end;
switch metric
case {0,'sqeuclidean'}
D = distEucSq( X, Y );
case 'euclidean'
D = sqrt(distEucSq( X, Y ));
case 'L1'
D = distL1( X, Y );
case 'cosine'
D = distCosine( X, Y );
case 'emd'
D = distEmd( X, Y );
case 'chisq'
D = distChiSq( X, Y );
otherwise
error(['pdist2 - unknown metric: ' metric]);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D = distL1( X, Y )
m = size(X,1); n = size(Y,1);
mOnes = ones(1,m); D = zeros(m,n);
for i=1:n
yi = Y(i,:); yi = yi( mOnes, : );
D(:,i) = sum( abs( X-yi),2 );
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D = distCosine( X, Y )
if( ~isa(X,'double') || ~isa(Y,'double'))
error( 'Inputs must be of type double'); end;
p=size(X,2);
XX = sqrt(sum(X.*X,2)); X = X ./ XX(:,ones(1,p));
YY = sqrt(sum(Y.*Y,2)); Y = Y ./ YY(:,ones(1,p));
D = 1 - X*Y';
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D = distEmd( X, Y )
Xcdf = cumsum(X,2);
Ycdf = cumsum(Y,2);
m = size(X,1); n = size(Y,1);
mOnes = ones(1,m); D = zeros(m,n);
for i=1:n
ycdf = Ycdf(i,:);
ycdfRep = ycdf( mOnes, : );
D(:,i) = sum(abs(Xcdf - ycdfRep),2);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D = distChiSq( X, Y )
%%% supposedly it's possible to implement this without a loop!
m = size(X,1); n = size(Y,1);
mOnes = ones(1,m); D = zeros(m,n);
for i=1:n
yi = Y(i,:); yiRep = yi( mOnes, : );
s = yiRep + X; d = yiRep - X;
D(:,i) = sum( d.^2 ./ (s+eps), 2 );
end
D = D/2;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D = distEucSq( X, Y )
%if( ~isa(X,'double') || ~isa(Y,'double'))
% error( 'Inputs must be of type double'); end;
m = size(X,1); n = size(Y,1);
%Yt = Y';
XX = sum(X.*X,2);
YY = sum(Y'.*Y',1);
D = XX(:,ones(1,n)) + YY(ones(1,m),:) - 2*X*Y';
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% function D = distEucSq( X, Y )
%%%% code from Charles Elkan with variables renamed
% m = size(X,1); n = size(Y,1);
% D = sum(X.^2, 2) * ones(1,n) + ones(m,1) * sum(Y.^2, 2)' - 2.*X*Y';
%%% LOOP METHOD - SLOW
% [m p] = size(X);
% [n p] = size(Y);
%
% D = zeros(m,n);
% onesM = ones(m,1);
% for i=1:n
% y = Y(i,:);
% d = X - y(onesM,:);
% D(:,i) = sum( d.*d, 2 );
% end
%%% PARALLEL METHOD THAT IS SUPER SLOW (slower then loop)!
% % From "MATLAB array manipulation tips and tricks" by Peter J. Acklam
% Xb = permute(X, [1 3 2]);
% Yb = permute(Y, [3 1 2]);
% D = sum( (Xb(:,ones(1,n),:) - Yb(ones(1,m),:,:)).^2, 3);
%%% USELESS FOR EVEN VERY LARGE ARRAYS X=16000x1000!! and Y=100x1000
% call recursively to save memory
% if( (m+n)*p > 10^5 && (m>1 || n>1))
% if( m>n )
% X1 = X(1:floor(end/2),:);
% X2 = X((floor(end/2)+1):end,:);
% D1 = distEucSq( X1, Y );
% D2 = distEucSq( X2, Y );
% D = cat( 1, D1, D2 );
% else
% Y1 = Y(1:floor(end/2),:);
% Y2 = Y((floor(end/2)+1):end,:);
% D1 = distEucSq( X, Y1 );
% D2 = distEucSq( X, Y2 );
% D = cat( 2, D1, D2 );
% end
% return;
% end