In [1]:
from __future__ import  division
import scipy as sp
import scipy.stats as stats
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import collections
In [7]:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [4]:
#Fake data Process
def FakeData(w,m,n,d):#m,n is the size for each group
    X1 = np.zeros((m,3))
    X2 = np.zeros((n,3))
    i=0
    j=0
    while i<m or j<n:
        x = np.random.normal(np.mean(w), d, 2)
        x = np.append(x,1)
        if i < m and x.dot(w) > 2 :
            X1[i] = x
            i += 1
        if j < n and x.dot(w) < 2 :
            X2[j] = x
            j += 1
    #x1d,x2d = DemotionX(X1,X2)
    x1,x2 = X1,X2
    y1 = [1]*len(X1)
    y2 = [-1]*len(X2)
    x = np.append(x1,x2,0)
    y1.extend(y2)
    y = np.array(y1)
    x_labeled,y_labeled,x_unlabeled,y_unlabeled = SetUpLabeledPortion(x,y,0.2)
    return x1,x2,x_labeled,y_labeled,x_unlabeled,y_unlabeled


def DemotionX(x1,x2):
    return np.delete(x1,2,1),np.delete(x2,2,1)


def LinearFuncW(w,x):
    t = []
    for i,xi in enumerate(x):
        t.append((-w[2] - xi[0]*w[0])/w[1])
    return t

def PlotTheHeperplane(W,x1,x2,x_unlabeled):#W is the list of all classifiers
    x = np.append(x1,x2,0)
    plt.plot(x_unlabeled[:,0],x_unlabeled[:,1],'ws')
    plt.plot(x1[:,0],x1[:,1],'r.')
    plt.plot(x2[:,0],x2[:,1],'b*')
    color = ['r','b','g','c','m','k','y','w']
    for i,w in enumerate(W):
        plt.plot(x[:,0],LinearFuncW(w,x),color[i%len(color)])


w = np.random.normal(0, 1, 3)

def SetUpLabeledPortion(x,y,f):#f is the fraction you want to label, then 1-f is the fraction remaining unlabeled
    n = len(x)#total number of data
    n_labeled = round(n*f,0)#number of labeled data
    labeled_index_list = np.random.choice(n,n_labeled,replace = False)
    x_unlabeled = np.delete(x,labeled_index_list,0)
    y_unlabeled = np.delete(y,labeled_index_list,0)
    x_labeled = x[labeled_index_list]
    y_labeled = y[labeled_index_list]
    return x_labeled,y_labeled,x_unlabeled,y_unlabeled

def PlotUnlabeledData(x_unlabeled,y_unlabeled,W):#W is the list of all classifiers
    color = ['r','b','g','c','m','k','y','w']
    plt.plot(x_unlabeled[:,0],x_unlabeled[:,1],'ws')
    for i,x in enumerate(x_unlabeled):
        if y_unlabeled[i] == 1:
            plt.plot(x[0],x[1],'r.')
        else:
            plt.plot(x[0],x[1],'b*')
    for i,w in enumerate(W):
        plt.plot(x_unlabeled[:,0],LinearFuncW(w,x_unlabeled),color[i%len(color)])

def PlotlabeledData(x_labeled,y_labeled,W):#W is the list of all classifiers
    color = ['r','b','g','c','m','k','y','w']
    for i,x in enumerate(x_labeled):
        if y_labeled[i] == 1:
            plt.plot(x[0],x[1],'r.')
        else:
            plt.plot(x[0],x[1],'b*')
    for i,w in enumerate(W):
        plt.plot(x_unlabeled[:,0],LinearFuncW(w,x_unlabeled),color[i%len(color)])



def PerceptronV0(train_data,train_label,n):#n is number of classifier wanted
    W = []
    for i in range(n):
        #print i
        w = np.zeros(3)
        seq = np.random.choice(range(len(train_data)),len(train_data),replace=False)
        while True:
            Cp = 0
            for t in seq:
                sample_input = train_data[t]
                if train_label[t]*(w.dot(sample_input)) <= 0:
                    w = w + train_label[t]*sample_input
                    Cp += abs(w.dot(sample_input))
            #print Cp
            if Cp == 0:
                break
        W.append(w)
    return W

def ActiveLearner(W,x_unlabeled,y_unlabeled,x_labeled,y_labeled):
    res = np.zeros((len(x_unlabeled),len(W)))
    print res.shape
    query_index = []
    for j,w in enumerate(W):
        for i,x in enumerate(x_unlabeled):
            if x.dot(w) >= 0:
                res[i][j] = 1
            else:
                res[i][j] = -1
    print res.shape
    for i in range(len(x_unlabeled)):
        if len(set(res[i])) != 1:
            query_index.append(i)
    x_unlabelednew = np.delete(x_unlabeled,query_index,0)
    y_unlabelednew = np.delete(y_unlabeled,query_index,0)
    x_increment = x_unlabeled[query_index]
    y_increment = y_unlabeled[query_index]
    x_labelednew = np.append(x_labeled,x_increment,0)
    y_labelednew = np.append(y_labeled,y_increment,0)
    failed_w = np.zeros((len(W),len(x_increment)))
    for j,w in enumerate(W):
        for i,x in enumerate(x_increment):
            print y_increment[i]*w.dot(x_increment[i])
            if y_increment[i]*w.dot(x_increment[i]) < 0:
                failed_w[j][i] = 1
    failed_list = []
    for i,fw in enumerate(failed_w):
        if sum(fw) > len(fw)/10:
            failed_list.append(i)
    W = np.delete(W,failed_list,0)
    return W,x_labelednew,y_labelednew,x_unlabelednew,y_unlabelednew,x_increment,y_increment


    #return x_labeled,y_labeled,x_unlabeled,y_unlabeled

def PlotIncrement(x_increment,y_increment):
    for i,x in enumerate(x_increment):
        if y_increment[i] == 1:
            plt.plot(x[0],x[1],'r.')
        else:
            plt.plot(x[0],x[1],'b*')
In [5]:
x1,x2,x_labeled,y_labeled,x_unlabeled,y_unlabeled = FakeData(w,200,200,1);W = PerceptronV0(x_labeled,y_labeled,30);PlotTheHeperplane(W,x1,x2,x_unlabeled)

CAL algorithm Implementation

20% labeled data, one iteration, reduce visual space to 10% percent.

In [12]:
PlotTheHeperplane(W,x1,x2,x_unlabeled);PlotlabeledData(x_labeled,y_labeled,W);PlotUnlabeledData(x_unlabeled,y_unlabeled,W)

Wt,x_labelednew,y_labelednew,x_unlabelednew,y_unlabelednew,x_increment,y_increment = ActiveLearner(W,x_unlabeled,y_unlabeled,x_labeled,y_labeled)
Out[12]:
<matplotlib.figure.Figure at 0x10cd05910>
<matplotlib.figure.Figure at 0x10cd05910>
In [14]:
PlotIncrement(x_increment,y_increment);PlotTheHeperplane(Wt,x1,x2,x_unlabeled)
In [ ]: