\documentclass[11pt]{article}
\usepackage{graphicx}
\usepackage{url}
\usepackage{amsmath,amsthm,amssymb}
\usepackage{enumerate,enumitem}
\usepackage{pdfpages}
\usepackage{listings}
\usepackage{graphicx}
\usepackage{mathtools}
\usepackage{latexsym}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{epsfig}
\usepackage{bbm}
\usepackage[right=0.8in, top=1in, bottom=1.2in, left=0.8in]{geometry}
\usepackage{setspace}
\setlength{\parindent}{0em}
\spacing{1.06}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{\vspace{0.25cm}
\hbox to 5.78in { {COMS 4995-2:\hspace{0.12cm}Advanced
Algorithms (Spring'20)} \hfill #2 }
\vspace{0.48cm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{0.42cm}
\hbox to 5.78in { {#3 \hfill #4} }\vspace{0.25cm}
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribes:\hspace{0.08cm}#4}{Lecture #1}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{example}[theorem]{Example}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{remark}[theorem]{Remark}
\newcommand{\E}{{\mathbb E}}
\DeclareMathOperator{\var}{Var}
\DeclareMathOperator*{\argmin}{arg\,min}
\begin{document}
\lecture{11: Cheeger's Inequality \& Spectral Graph Partitioning}{Feb 25,
2020}{Instructor:\hspace{0.08cm}\emph{Alex Andoni }}{\emph{Patrick Chi, William Wang}}
\section{Definitions and Motivations}
Recall from last lecture we define the Laplacian and its normalized form for a graph $G = (V, E)$ as:
$$L = D - A$$
$$\hat{L} = D^{-1/2}L D^{-1/2}$$
with
$$0 = \mu_1 \le \mu_2 \le \cdots \le \mu_n \le 2$$
where $\mu_1, \cdots, \mu_n$ are the sorted eigenvalues of $\hat{L}$.
\begin{definition}
A cut is defined as a separation of a graph $G$ into two subsections $S$ and $\bar{S}$.
\end{definition}
\begin{definition}
A graph is disconnected if there are no edges across a cut. We define a boundary of $S$ as:
$$\partial S = \{ (i, j) \in E, i \in S, j \in \bar{S} \}$$
\end{definition}
\begin{theorem}
$\mu_2 = 0 \Longleftrightarrow G$ is disconnected.
\end{theorem}
\begin{proof}
This can easily be shown by the fact that we break down the adjacency matrix into two separate parts. Each part is its own distinct graph, thus there will be at least one other eigenvalue equal to $0$.
\end{proof}
\subsection{Conductance}
We want to develop a quantitative statement to show how close to disconnected a graph is. This motivates the idea of conductance.
\begin{definition}
We define the conductance of a cut $S$ to be:
$$\phi(S) \triangleq \frac{|\partial S|}{vol{S}} ~~~~ \text{\normalfont where} ~~~~ vol(S) \triangleq \sum_{i \in S} d_i$$
We define the conductance of a graph $G$ to be
$$\phi(G) = \min_{S \subset V} \phi(S) ~ s.t. ~ 0 < vol(S) \le \frac{vol(V)}{2}$$
or in other words
$$\phi(G) = \min_{S: vol(S) \ne 0} ~ \frac{|\partial S|}{\min \{ vol(S), vol(\bar{S}) \}}$$
\end{definition}
Note that $\phi(S) \ne \phi(\bar{S})$ as we divide by the total number of edges in $S$. We generally look at sets with less than $\frac{1}{2}$ the edge volume for this reason.
\\ \\
Note we can also consider an alternate variation of conductance of $S$ as the following:
$$\phi(S) = \frac{\text{\normalfont \# edges crossing $S \rightarrow \bar{S}$}}{2(\text{\normalfont \# internal edges}) + (\text{\normalfont \# edges $S \rightarrow \bar{S}$})} \le 1$$
\begin{remark}
Computing $\phi(G)$ is NP-hard. Moreover, even computing to a constant factor approximation is NP-hard.
\end{remark}
\section{Cheeger's Inequality}
This motivates \textbf{Cheeger's Inequality}, which we define as follows:
\begin{theorem}
$$\frac{\mu_2}{2} \leq \phi(G) \leq \sqrt{2 \mu_2}$$
\end{theorem}
Cheeger's inequality allows us to bound the connectivity of a graph, and get an idea of how ``connected" a graph is just from its Laplacian. The left-hand inequality is somewhat easier and more ``intuitive" to prove, while the right-hand inequality involves a much more involved proof.
\subsection{Proof of $\frac{\mu_2}{2} \leq \phi(G) $}
Note that the motivation for this proof comes from the idea that if $\mu_2$ is small we should be able to find a cut in $G$ that has small conductance.
\begin{proof}
This is the ``easier" of the two inequalities to prove.
\\ \\
Recall that
\begin{align*}
\mu_2 = \min_{x \neq 0, \ x \bot v_1} \frac{x^T \hat{L} x}{\|x\|^2} && \phi(G) = \min_{S: \ 0 < vol(S) \leq \frac{vol(G)}{2}} \Phi(S)
\end{align*}
Thus, it is sufficient to exhibit an $x$ such that $R(x) \leq 2 \phi (S^*)$, where $S^* = \argmin_S \Phi(S)$
\begin{fact}
$$
y^T L y = \sum_{(i,j) \in E}(y_i - y_j)^2
$$
\end{fact}
Using this fact, we try to construct a vector $x$ to map $R(x)$ to $\phi (S^*)$
\begin{align*}
\mu_2 &= \min_{x \neq 0, \ x \bot v_1} R(x) = \min_{x \neq 0, \ x \bot v_1} \frac{x^T D^{-\frac{1}{2}} L D^{-\frac{1}{2}} x}{\|x\|^2}
\end{align*}
Perform the substitution $y = D^{-\frac{1}{2}} x$, then we can minimise with respect to $y$
\begin{align*}
\mu_2 &= \min_y \frac{y^T L y}{y^T D y}
\end{align*}
We now set $\mathbf{y} = \mathbbm{1}_{S^*}$, where we define $\mathbf{y}$ as:
$$
y_i =
\begin{cases}
1 & \text{if $i \in S^*$} \\
0 & \text{otherwise}
\end{cases}
$$
Note now that the numerator in our expression for $\mu_2$ becomes:
$$
y^T L y = \sum_{(i,j) \in E} (y_i - y_j)^2 = |\partial S^*|
$$
The second equality follows from the definition of $y_i$, where only an edge crossing from $S$ to $\overline{S}$ will contribute to the summation. Now that we have constructed a $y$ that relates the $R(x)$ to $|\partial S^*|$, we need to prove the corresponding vector $x$ is orthogonal to $v_1$ (since clearly $x \neq 0$).
\\ \\
We will modify $y$ such that $x$ becomes perpendicular to $v_1$. Consider $y' \triangleq y - \sigma \mathbbm{1}_n$, where $\sigma$ is some constant. Note that we still have
$$
(y')^T L y' = | \partial S^* |
$$
Also, recall that $v_1 = (\sqrt{d_1} \cdots \sqrt{d_n})^T$. Our objective is now to set $\sigma$ such that
\begin{align*}
v_1 \cdot D^{-\frac{1}{2}} \cdot y' = 0 &\iff \mathbbm{1} \cdot D \cdot y' = 0
\\
&\iff \sum_{i \in [n]} d_i (y_i - \sigma) = 0
\\
&\iff \sum_{i \in [n]} d_i y_i = \sigma \sum_{i \in [n]} d_i
\\
&\iff vol(S) = \sigma \cdot vol(V)
\end{align*}
Thus, if we set $\sigma = \frac{vol(S)}{vol(V)}$, we have $x \bot v_1$ as desired. Also note that we always choose the smaller half of of the graph for $S$, so we have $\sigma \leq \frac{1}{2}$.
\\ \\
Finally, we need to calculate $(y')^TDy'$ for the denominator:
\begin{align*}
(y')^TDy' &= \sum_{i \in [n]} y'_i d_i y'_i
\\
&= \sum_{i \in S^*}(1-\sigma)^2 d_i + \sum_{i \notin S^*}(-\sigma)^2d_i
\\
&= (1-\sigma)^2 vol(S^*) + \sigma^2(vol(V) - vol(S^*))
\\
&= (1-\sigma) vol(S^*)
\end{align*}
Putting everything together, we get:
\begin{align*}
R(y') &= \frac{(y')^T L y'}{(y')^TDy'}
\\
&= \frac{| \partial S^* |}{(1-\sigma) vol(S^*)}
\\
&= \frac{\phi(S^*)}{1 - \sigma}
\\
&\leq 2\phi(S^*)
\end{align*}
\end{proof}
\subsection{Proof of $\phi(G) \leq \sqrt{2 \mu_2}$}
The proof is very involved, but we will outline a general strategy to approach the proof. Our motivation is to show that if $\mu_2$ is small, the conductance should be small as well and thus we should be able to construct a cut that is small.
\begin{proof}
Suppose that $v_2$ is the second eigenvector, or in other words $R(v_2) = \mu_2$. We want to construct a set $S$ such that:
$$\frac{|\partial S|}{\min\{vol(S), vol(\bar{S})\}} \le \sqrt{2R(v_w)}$$
The question is now how do we construct a set given this eigenvector. As this is a proof sketch, we claim that we can do this with the following algorithm.
\begin{itemize}
\item [-] Sort vertices by their value in $y \triangleq v_2$, so we have $y_1 \leq y_2 \cdots \leq y_n$
\item [-] Consider every cut $S_t = \{i; ~y_i \leq t\}$
\item [-]Output $\argmin_{t} \phi(S_t)$
\end{itemize}
The objective of this algorithm is to essentially find the ``cut threshold" that will give us the graph with the least conductance, which we can then show to be less than the RHS of our inequality. As a side remark, it's easy to verify that the algorithm described above runs in $\mathcal{O}(m)$ time after computing the second eigenvector $v_2$.
\end{proof}
More regarding Cheeger's Inequality will be described in later lectures.
\end{document}