\documentclass[]{article}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\usepackage{fixltx2e} % provides \textsubscript
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\else % if luatex or xelatex
\ifxetex
\usepackage{mathspec}
\else
\usepackage{fontspec}
\fi
\defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
\fi
% use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
% use microtype if available
\IfFileExists{microtype.sty}{%
\usepackage[]{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\PassOptionsToPackage{hyphens}{url} % url is loaded by hyperref
\usepackage[unicode=true]{hyperref}
\hypersetup{
pdftitle={COMS 6998-4 F17 Homework 1 (due Wednesday October 18)},
pdfauthor={Daniel Hsu (djh2164)},
pdfborder={0 0 0},
breaklinks=true}
\urlstyle{same} % don't use monospace font for urls
\usepackage[margin=1in]{geometry}
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
}
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{0}
% Redefines (sub)paragraphs to behave more like sections
\ifx\paragraph\undefined\else
\let\oldparagraph\paragraph
\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
\fi
\ifx\subparagraph\undefined\else
\let\oldsubparagraph\subparagraph
\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
\fi
% set default figure placement to htbp
\makeatletter
\def\fps@figure{htbp}
\makeatother
\title{COMS 6998-4 F17 Homework 1 (due Wednesday October 18)}
\author{Daniel Hsu (djh2164)}
\date{}
\begin{document}
\maketitle
\newcommand\E{\mathbb{E}}
\renewcommand\P{\mathbb{P}}
\newcommand\R{\mathbb{R}}
\newcommand\ip[1]{\langle #1 \rangle}
\section{Instructions}\label{instructions}
Solve \textbf{two} of the assigned problems.
Submit your write-up on Gradescope as a PDF document by 11:59 PM of the
due date. Upon submitting to Gradescope, make sure each problem has some
page of the PDF assigned to it, or we will assume the problem was not
solved.
Make sure the following appears on the first page of your write-up:
\begin{itemize}
\tightlist
\item
your name,
\item
your UNI, and
\item
the names and UNIs of any students with whom you discussed the
assignment.
\end{itemize}
You are welcome to use the Markdown or LaTeX source for the assignment
as a template for your write-up. I use Pandoc \url{http://pandoc.org} to
translate the Markdown to \LaTeX~and ultimately to PDF.
\newpage
\section{Problem 1}\label{problem-1}
Consider the setting for online decision-making with bandit feedback and
expert advice, with the following change (parameterized by a positive
integer \(n\) that is at most \(N\)). In each round, the learner chooses
at most \(n\) of the experts, and receives the actions of only the
chosen experts; the actions of the experts not chosen are unobserved.
The rest of the round is the same as before: the learner choose an
action and then receives the cost for (only) the chosen action. Assume
the costs for all \(K\) actions are always in the range \([0,1]\).
Prove that there is an algorithm for the learner in this setting with
expected regret at most \[
O\left( \sqrt{\frac{\min\{K,n\}N\log N}{n} \cdot T} \right)
\] after \(T\) rounds.
\emph{Hint}: Modify the Exp4 algorithm.
\bigskip
\emph{Your solution}:
\newpage
\section{Problem 2}\label{problem-2}
Let \((X_t)_{t=1,2,\dotsc}\) be a sequence of random variables, and
\((Y_t)_{t=1,2,\dotsc}\) a sequence of random functionals such that, for
each \(t\), \(Y_t\) depends only on \(X_1,\dotsc,X_t\). Define the
notation
\(\E_t[ \,\cdot\, ] := \E[ \,\cdot\, \mid X_1,\dotsc,X_{t-1} ]\).
\begin{itemize}
\item
Part 1. Prove that for any \(T\geq1\), \(\lambda>0\), and
\(\delta \in (0,1)\), \[
\P\left( \sum_{t=1}^T Y_t \geq \frac1\lambda \sum_{t=1}^T \ln \E_t[ \exp(\lambda Y_t) ] + \frac1\lambda \ln(1/\delta) \right) \leq \delta .
\] \emph{Hint}: Determine the expectation of the random variable \[
\lambda \sum_{t=1}^T Y_t - \sum_{t=1}^T \ln \E_t[ \exp(\lambda Y_t) ] ,
\] and then apply Markov's inequality.
\item
Part 2 (optional). Assume that for all \(t\), we have \(Y_t \leq 1\)
and \(\E_t[Y_t] = 0\). Use the inequality from Part 1 to prove that
for any \(\delta \in (0,1)\) and \(v>0\), \[
\P\left( \sum_{t=1}^T Y_t \geq \sqrt{2v \ln(1/\delta)} + \frac{\ln(1/\delta)}{3} \ \wedge \ \sum_{t=1}^T \E_t[Y_t^2] \leq v \right) \leq \delta .
\] \emph{Hint}: First prove that for any \(\lambda>0\), \[
\frac1\lambda \sum_{t=1}^T \ln \E_t[ \exp(\lambda Y_t) ] \leq \frac{e^\lambda - \lambda - 1}{\lambda} \sum_{t=1}^T \E_t[Y_t^2] .
\] Now use the fact that for \(\lambda \in (0,3)\), \[
\frac{e^\lambda - \lambda - 1}{\lambda} \leq \frac{\lambda}{2(1-\lambda/3)} .
\] (You may use this last inequality without proof.) Conclude the
proof by assuming \(\sum_{t=1}^T \E_t[Y_t^2] \leq v\) and choosing
\(\lambda \in (0,3)\) appropriately.
\item
Part 3. Consider a variant of the Exp4 algorithm for online
decision-making with bandit feedback and expert advice in which all of
the probability vectors \(p_t\) are chosen so that
\(p_t(a) \geq p_{\min}\) for some given \(p_{\min}>0\). (Don't worry
about how this is done.) For each \(a \in [K]\) and \(t=1,\dotsc,T\),
assume that \(c_t(a) \in [0,1]\) and also that \(\hat c_t(a)\) is the
Horvitz-Thompson (inverse probability weighting) estimator of
\(c_t(a)\). Use either the probability inequality from Part 2 or
Theorem 1.6 from David Freedman's 1975 paper ``On Tail Probabilities
for Martingales'' to prove a bound on \[
\max_{i \in [N]}
\left\{
\left( \sum_{t=1}^T c_t(a_t) - c_t(b_{t,i}) \right)
- \left( \sum_{t=1}^T \ip{\hat c_t,p_t} - \hat c_t(b_{t,i}) \right)
\right\}
\] that holds with probability at least \(1-\delta\).
\end{itemize}
\bigskip
\emph{Your solution}:
\newpage
\section{Problem 3}\label{problem-3}
Design an online classification algorithm such that, in the realizable
setting where the target concept is a homogeneous linear threshold
function, \(c = h_{w^*}\) for some \(w^* \in \R^d\), and the conditions
below hold, a learner using the algorithm makes at most
\(\log_2 \lceil (1 + \|w^*\|_2)^d \rceil\) mistakes.
The conditions are: \[
\|x_n\|_2 = 1 , \quad |\ip{w^*,x_n}| \geq 1 , \quad n = 1,2,\dotsc .
\]
\bigskip
\emph{Your solution}:
\end{document}