In [27]:
# Preamble  here, add paths and import relevant modules
import os
import sys
base_path = '/'.join(os.getcwd().split('/')[:-1])
sys.path.append('/'.join(os.getcwd().split('/')[:-1]))
import util
reload(util)
import numpy as np 
import sklearn
from sklearn import datasets
from scipy.sparse.linalg import eigsh
from numpy.random import permutation
from scipy import ndimage
from scipy import misc
% matplotlib inline
import matplotlib.pyplot as plt

Tutorial 3 -- Nystrom Extension

Nystrom Extension is a way of computing approximately the eigenvectors of a graph Laplacian. It is a sampling strategy to bypass the $O(N^2)$ complexity in constructing the full Laplalacian Matrix. Since it's a sampling strategy, we need the graph to be well-connected enough so that the samples correctly reflect the geometry of the original graph

We load the test image Two Cows and use Nystrom Extension to compute its kernels. Since we are clustering, it's no surprise that the names of our datasets start with a number.

In [28]:
# load the two cows image
img = np.float32(misc.imresize(misc.imread(base_path+'/data/Images/twocows.png'),(256,256)))/255.
#initialization
import graph_cluster as gc
reload(gc)
clf = gc.LaplacianClustering()
clf.load_data(raw_data = util.imageblocks(img, width = 2)) #patch size = 2*width + 1
plt.imshow(img)
plt.rcParams['figure.figsize'] = (5, 5)
plt.axis('off')
plt.show()

To use Nystrom, simply switch the Eig_solver property to 'nystrom'. Currently the affinity only supports 'rbf' kernel, with either cosine or euclidean distance metric.

In [29]:
clf.build_graph(Neig = 100,Eig_solver = 'nystrom',gamma = 1, num_nystrom = 100)

Below is a visualization of the eigenvectors we have just computed.

In [31]:
v = clf.graph.laplacian_matrix_['V']
e = clf.graph.laplacian_matrix_['E']
plt.figure(2)
for i in range(1,4):
    plt.subplot(1,3,i)
    plt.imshow(v[:,i].reshape(img.shape[0],img.shape[1]))
    plt.axis('off')
plt.rcParams['figure.figsize'] = (10, 5)
plt.tight_layout()  
plt.show()    

This image roughly has four regions, and the eigenvectors clearly reflect those regions. But what about images that have less clear delineations? We'll next try the lena image.

In [32]:
# load the lena image
img = np.float32(misc.imresize(misc.imread(base_path+'/data/Images/lena.png'),(256,256)))/255.
clf.load_data(util.imageblocks(img, width = 2)) #patch size = 2*width + 1
clf.build_graph(Neig = 100,Eig_solver = 'nystrom',gamma = 1, num_nystrom = 100)
In [34]:
v = clf.graph.laplacian_matrix_['V']
e1 = clf.graph.laplacian_matrix_['E']
plt.figure(2)
for i in range(3):
    for j in range(3):
        plt.subplot(3,3,i*3+j+1)
        plt.imshow(v[:,i*3+j+1].reshape(img.shape[0],img.shape[1]))
        plt.axis('off')
plt.rcParams['figure.figsize'] = (10, 10)
plt.tight_layout()  
plt.show()