In [241]:
from __future__ import division
import pickle
import math
import scipy as sp
import scipy.stats as stats
import scipy.io as sio
import numpy as np
import matplotlib 
import collections
import math
import random 

Instruction

Your friend from overseas is visiting you and asks you the geographical locations of US cities on a map. Not having access to a US map, you realize that you cannot provide your friend accurate information. You recall that you have access to the relative distances between nine poplular US cities, given by a distance matrix D.

Being a machine learning student, you believe that it may be possible to infer to locations of these cities from the distance data. To find an embedding of these nine cities on a two dimensional map, you decide to solve it as an optimization problem.

In [243]:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [265]:
D = np.array([[0, 206, 429, 1504, 963, 2976, 3095, 2979, 1949],
     [206, 0, 233, 1308, 802, 2815, 2934, 2786, 1771],
     [429, 233, 0, 1075, 671, 2684, 2799, 2631, 1616],
     [1504, 1308, 1075, 0, 1329, 3273, 3053, 2687, 2037],
     [963, 802, 671, 1329, 0, 2013, 2142, 2054, 996],
     [2976, 2815, 2686, 3273, 2013, 0, 808, 1131, 1307],
     [3095, 2934, 2799, 3053, 2142, 808, 0, 379, 1235],
     [2979, 2786, 2631, 2687, 2054, 1131, 379, 0, 1059],
     [1949, 1771, 1616, 2037, 996, 1307, 1235, 1059, 0]])

#Initialize x
x = np.array([[random.randint(-3000,3000),random.randint(-3000,3000)]])
for i in range(8):
    x = np.append(x,[[random.randint(-3000,3000),random.randint(-3000,3000)]],0)
    
In [266]:
def dC(x,y):#distanceCalculator
    return math.sqrt((x-y).dot(x-y))

def sim1(x,y):#similarity b/t 
    s = 0
    for a,b in zip(x,y):
        s += dC(a,b)
    return s
    
def gD(x,eta,D):#gradientDescent
    cnt = 0
    while True:
        cnt += 1
        x_old = x.copy()
        print cnt
        for i in range(9):
            for j in range(9):
                if i != j:
                    x[i] = x[i] -  eta * (2*(x[i] - x[j])*(dC(x[i],x[j]) - D[i][j]))/(dC(x[i],x[j])+ 0.000000001)       
        print sim1(x,x_old)
        if sim1(x,x_old) < 0.1 :
            break
        if cnt == 5000:
            break
    return x            
               
In [268]:
D_new = np.array([[0]*9]*9)
for i in range(9):
    for j in range(9):
        D_new[i][j] = dC(x[i],x[j])
In [ ]:
x_new = gD(x,0.01,D)
In [269]:
abs(D_new-D)#difference compared to reality
Out[269]:
array([[  0,  10,  12,  69,   6, 104,  29,  50,  39],
       [ 10,   0,   1,  14,  21,  96,  49,  46,  50],
       [ 12,   1,   0,  50,  24,  44,  53,  52,  50],
       [ 69,  14,  50,   0,  52,  91,  59,  23, 150],
       [  6,  21,  24,  52,   0,   8,  36,  94,  55],
       [104,  96,  46,  91,   8,   0,   1,  35,   0],
       [ 29,  49,  53,  59,  36,   1,   0,  10,  47],
       [ 50,  46,  52,  23,  94,  35,  10,   0,  40],
       [ 39,  50,  50, 150,  55,   0,  47,  40,   0]])
In [270]:
pylab.plot(x[:,0],x[:,1],'ro')
labels = ['BOS','NYC','DC','MIA','CHI','SEA','SF','LA','DEN']
for label, x, y in zip(labels, x[:, 0], x[:,1]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))

pylab.show
Out[270]:
<function matplotlib.pyplot.show>

Observations

1. Only relative location is simulated.

2. The initial value of x influde the optimal a lot. (this is based on my experiments of trying different range for initial values)