\documentclass[11pt]{article}
\usepackage{latexsym}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{bbm}
\usepackage{graphicx}
\usepackage{float}
\usepackage{epsfig}
\usepackage[right=0.8in, top=1in, bottom=1.2in, left=0.8in]{geometry}
\usepackage{setspace}
\spacing{1.06}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{\vspace{0.25cm}
\hbox to 5.78in { {COMS 4995-2:\hspace{0.12cm}Advanced
Algorithms (Spring'20)} \hfill #2 }
\vspace{0.48cm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{0.42cm}
\hbox to 5.78in { {#3 \hfill #4} }\vspace{0.25cm}
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribes:\hspace{0.08cm}#4}{Lecture #1}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{example}[theorem]{Example}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{conjecture}[theorem]{Conjecture}
\newcommand{\E}{{\mathbb E}}
\DeclareMathOperator{\var}{Var}
\begin{document}
\lecture{14: Simplex Algorithm, Duality}{Mar 5,
2020}{Instructor:\hspace{0.08cm}\emph{Alex Andoni }}{\emph{Suman Mulumudi, Serena Yuan}}
In these notes, we introduce duality and the Simplex algorithm (Dantzig 1949).
\section{Simplex Algorithm}
\begin{definition}
$x$ is a vertex of the feasible region $F \iff x$ is a basic feasible solution (bfs) $\iff$ $x$ tightly satisfies precisely $n$ linearly independent constraints.
\end{definition}
\begin{definition}
The edge $(v,u)$ exists $\iff$ $v$ and $u$ tightly satisfy $n-1$ shared linearly independent constraints.
\end{definition}
\begin{definition}
The neighborhood of $x$ is $N(x) := \{ \text{ all } y \text{ such that } (x,y) \text{ is an edge (neighbors)} \}.$
\end{definition}
\textbf{Simplex Algorithm:}
\begin{enumerate}
\item Choose $x^0 \in F$, the starting vertex.
\item Consider $N(x^t)$, where $x^t$ is $x$ at iteration $t$. Choose some neighbor $y \in N(x^t)$ such that $c^{\top} y < c^{\top}x^t$
\item Set $x^{t+1} = y$, and repeat from step 2 (if such $y$ exists). If $y$ does not exist, $x^t$ is optimal.
\end{enumerate}
For the standard form of the LP given by
\begin{equation*}
\begin{array}{lr@{}c@{}r@{}l}
\text{minimize } & c^{\top} x \\
\text{subject to } & Ax \leq b
\end{array}
\end{equation*}
it is possible to find a feasible starting point for Step 1 of the Simplex Algorithm by using an auxiliary LP to find $x^0 \in F$. This is equivalent to finding the minimum $t$ such that $Ax \leq b + t \cdot \mathbbm{1} $: set all $x$'s to be 0, and $t$ is the most negative $b$ value. Solve this auxiliary LP starting from $x =0$ and $t = - \min_i b_i$ (which is necessarily feasible for the auxiliary LP).
\begin{itemize}
\item If $t >0,$ the original LP is not feasible
\item Otherwise, $x$ is a feasible point
\end{itemize}
Note that we obtain a point $x$, but if $x$ is not a vertex of $F$ (i.e. it does not satisfy precisely $n$ constraints), push $x$ to tightly satisfy precisely $n$ constraints.
\begin{remark}
The Simplex Algorithm will converge eventually.
\end{remark}
\begin{remark}
Step 2 of the Simplex Algorithm is the "pivoting" rule.
\end{remark}
\begin{remark}
Choosing $y^* = \arg \min_{y \in N(x)} c^{\top} y$ is not necessarily the optimal pivoting rule (where optimal is the rule which leads to fastest convergence).
\end{remark}
We note that $N(x)$ can be computed by the following algorithm.
\textbf{Pivoting Algorithm:}
\begin{itemize}
\item Let $E_1, \cdots, E_n$ be equalities defining $x$
\item For each $i \in [n]$: \\
\hspace*{10mm} For each $j \in [m]$: \\
\hspace*{10mm} \hspace*{10mm} Define $y$ as solution to $E_1, \cdots, E_{i-1}, E_{i+1} , \cdots, E_n$ and $A_j y = b_j$ \\
\hspace*{10mm} \hspace*{10mm} If $y$ is unique and $y\in F$, add it to $N(x)$.
\end{itemize}
The total time of this algorithm is $\# \text{ iterations} \cdot n \cdot m \cdot ( GE + m \cdot n)$ where $GE$ is the time to perform Gaussian elimination and $m\cdot n$ is the time to check $Ay \leq b$.
\begin{remark}
For many pivoting rules, there exist LPs taking $exp(n)$ number of iterations.
\end{remark}
\begin{remark}
"Smoothed LPs" admit polytime simple algorithms, where smoothed LPs
are created by perturbing the parameters of the program. [Spielman, Teng]
\end{remark}
\begin{conjecture}
Hirsch Conjecture: Consider graph $G$ composed of vertices and edges (of the polytope). The graph has $poly(n,m)$ diameter and $exp(n)$ vertices.
\end{conjecture}
\section{Duality}
\subsection{Reminder: Standard form of Primal LP}
Previously, we defined the standard form of the primal:
\begin{definition}
The standard form of the primal LP is:
\begin{equation*}
\begin{array}{lr@{}c@{}r@{}l}
\text{minimize } & v^* = c^\top x \\
\text{subject to } & Ax = b \\
& x \geq 0
\end{array}
\end{equation*}
\end{definition}
And we have shown previously that all LPs can be converted to this form.
\subsection{The Dual}
Consider the following definition of the dual:
\begin{definition}
Given a primal in standard from, the dual LP is:
\begin{equation*}
\begin{array}{lr@{}c@{}r@{}l}
\text{maximize } & w^* = b^\top y \\
\text{subject to } & A^\top y \leq c
\end{array}
\end{equation*}
\label{def:dual}
\end{definition}
We will motivate this construction by demonstrating weak duality:
\begin{theorem}
Weak Duality: Given a primal LP and its optimal solution $v^*$, and its corresponding dual and dual optimal solution $w^*$, then $v^* \geq w^*$.
\end{theorem}
\begin{proof}
The proof is given in the motivation for constructing the Definition \ref{def:dual} above.
Consider a vector $y \in R^m$ such that $y^\top A \leq c^\top$ (where the inequality holds coordinate-wise). Multiplying by some $x \in F$ where $F$ is the feasible region of the primal yields:
\begin{align*}
y^\top A x \leq c^\top x
\end{align*}
Here we note we have used the fact that, in the primal, we have restricted $x \geq 0$. Further, noting that since $x \in F$ it follows that $Ax = b$, thus can be re-written as:
\begin{align*}
& y^\top b = y^\top A x \leq c^\top x \\
\implies & \forall x \in F, \; y^\top b \leq c^\top x \\
\implies & y^\top b \leq \min_{x \in F} c^\top x = v^*
\end{align*}
\end{proof}
We see we have constructed the dual such that it yields an optimization problem which permits a lower bound on the optimal solution of the primal.
\subsection{Dual of the Dual}
\begin{theorem}
For linear programs, the dual of the dual is the primal.
\end{theorem}
\begin{proof}
Given the dual in Definition $\ref{def:dual}$ above, the dual can be re-written in the standard form as:
\begin{equation*}
\begin{array}{lr@{}c@{}r@{}l}
\text{optimize} & w^* = - \min_{y^+, y^-, \delta} -b^\top (y^+ - y^-) \\
\text{subject to } & A_i^\top y^+ - A_i^\top y^- + \delta_i = c_i \\
& y^+, y^-, \delta \geq 0
\end{array}
\end{equation*}
Where the original $y$ is now $y = y^+ - y^-$. (More details on this construction can be found in earlier notes.)
For which we can construct the dual:
\begin{equation*}
\begin{array}{lr@{}c@{}r@{}l}
\text{optimize} & v^* = - \max_{z \in R^n} c^\top z = \min_{z \in R^n} c^\top (-z) \\
\text{subject to } & Az \leq -b \\
& -Az \geq b \\
& Iz \leq 0
\end{array}
\end{equation*}
We note $Az \leq -b$ and $-Az \geq b$ gives $A(-z) = b$. Substituting $x = -z$ recovers the primal form:
\begin{equation*}
\begin{array}{lr@{}c@{}r@{}l}
\text{optimize} & v^* = \min_{z \in R^n} c^\top (-z) = \min_{x \in R^n} c^\top x \\
\text{subject to } & Ax = b \\
& x \geq 0
\end{array}
\end{equation*}
\end{proof}
\subsection{Strong Duality}
\begin{claim}
Strong Duality: For LPs, if $v^*$ is the solution to the primal and $w^*$ is the solution to the dual, then $v^* = w^*$. That is, the primal and dual have identical solutions.
\end{claim}
\subsection{Intuition for Strong Duality}
Proving strong duality in general is tedious, but we will provide intuition here for why strong duality should hold by making an analogy with physics. In the dual, we are optimizing $w^* = -\min b^\top (-y)$ subject to $A^\top y \leq c$. For convenience, we re-define $-y \to y$ and $-c \to c$, thus the dual LP is now:
\begin{equation*}
\begin{array}{lr@{}c@{}r@{}l}
\text{optimize} & w^* = \min b^\top y \\
\text{subject to } & A^\top y \geq c
\end{array}
\end{equation*}
Consider the space $R^m$ in which $y$ lives, and consider $y$ to be the location of a ball. Further, orient the space such that $b$ points upwards, and there is a gravitational force $-b$ pulling the ball at $y$ downwards. The constraints $A^\top y \geq c$ create $n$ hyperplanes at $A_i^\top y = c$ which form a kind of "convex bowl" in the space. Thus, when the ball $y$ is dropped into the space, it will fall until it reaches the lowest point in the bowl. Since $b$ points upwards, this corresponds to an optimal solution to the dual problem (since the LP essentially minimizes the projection of $y$ along $b$).
\begin{figure}[H]
\centering
\includegraphics[width=8cm]{ex_gravity.jpeg}
\end{figure}
If there is no plane perpendicular to $b$, then $y$ will have reached a vertex of the feasible region. Returning to the physics analogy, this requires that $y$ is supported by normal forces coming from the planes with which it is in contact. Since $y$ is in $R^m$, a vertex will be found at the intersection of precisely $m$ planes. Suppose that each hyperplane $i$ provides a normal force of $x_i$. Since the ball is at rest, the total normal force must equal the gravitational force $b$, thus over all hyperplanes (of which $m < n$ actually contribute):
\begin{align*}
\sum_i A_i x_i = b
\end{align*}
Consider that $y$ will be resting on $m$ of $n$ possible hyperplanes, thus $m$ constraints will be tight. For those that are not tight, it follows that the $x_i$ for that constraint should be $0$ (since it will not be contributing to the normal force), thus $x_i A y_i = 0 = c_i x_i$. If the constraint is tight, then $A_i y_i = c \implies x_i A_i y_i = x_i c_i$. Noting that in the primal form, we have $A^\top x = b$, summing over all constraints yields:
\begin{align*}
\sum_{i} x_i A_i y = \left ( \sum_{i} x_i A_i \right ) y = (A^\top x)^\top y = b^\top y \\
\sum_{i} x_i A_i y = \sum_{i} c_i x_i = c^\top x \\
\implies c^\top x = b^\top y
\end{align*}
Noting $-v^* = c^\top x$ and $-w^* = b^\top y$, and noting that we have demonstrated weak duality $v^* \geq w^*$, showing an instance of $c^\top x = b^\top y$ gives strong duality. There are cases which have not been shown (such as when a constraint creates a hyperplane perpendicular to $b$), but this physical analogy should give intuition for why strong duality holds for LPs.
\end{document}