\documentclass[11pt]{article}
\usepackage{amssymb,amsmath,amsthm}
\usepackage[all]{xy}

\newcommand{\R}{{\mathbb R}}
\newcommand{\Q}{{\mathbb Q}}
\newcommand{\Z}{{\mathbb Z}}
\newcommand{\Aut}{\operatorname{Aut}}
\newcommand{\im}{\operatorname{im}}
\newcommand{\degree}{\operatorname{degree}}
\newcommand{\divides}{\mbox{ \Large{$\vert$} }}
\newcommand{\Var}{\operatorname{Var}}
\newcommand{\Cov}{\operatorname{Cov}}
\renewcommand{\1}{\mathbf{1}}

%\newcommand{\Pr}{\operatorname{Pr}}

\parindent=0cm

\oddsidemargin=0in
\textwidth=6.5in

\newenvironment{question}[1]{\noindent{\large{\bf{Problem #1}}}\hspace{1em} \begin{em}}{\end{em}\bigskip}
\newenvironment{solution}{\noindent\ignorespaces\par\noindent \large{\bf{Solution}} }{\bigskip}

\title{The Probabilistic Method\\Homework 2}
\author{Brett Hemenway}

\begin{document}
	\maketitle

	\begin{question}{1.}
		Let $X$ be a random variable taking integral nonnegative values, let $E(X^2)$ denote the expectation of its square, and let $\Var(X)$ denote its variance.
		Prove that
		\[	
			\Pr(X=0) \le \frac{\Var(X)}{E(X^2)}.
		\]
	\end{question}

	\begin{solution}
		Notice that 
		\[
			\Pr(X=0) = 1 - \Pr(X\ge 1),
		\]
		and
		\[
			\frac{\Var(X)}{E(X^2)} = \frac{E(X^2)-E(X)^2}{E(X^2)} = 1 - \frac{E(X)^2}{E(X^2)},
		\]
		so to prove the result, it suffices to show 
		\[
			\Pr(X \ge 1) \ge \frac{E(X)^2}{E(X^2)}.
		\]
		To see this, consider the conditional expectation
		\[
			E(X|X \ge 1) = \sum_{n=0}^\infty n \frac{ P(X=n \cap X \ge 1)}{P(X \ge 1)} = \frac{1}{P(X \ge 1)} \sum_{n=1}^\infty n P(X=n) = \frac{E(X)}{P(X \ge 1)}.
		\]
		Similarly,
		\[
			E(X^2| X \ge 1) = \sum_{n=0}^\infty n^2\frac{P(X=n \cap X \ge 1)}{P(X \ge 1)} = \frac{E(X^2)}{P(X \ge 1)}.
		\]
		Now, 
		\begin{align*}
			E(X|X \ge 1)^2 \le E(X^2|X \ge 1)	&\Rightarrow \frac{E(X)^2}{P(X \ge 1)^2} \le \frac{ E(X^2) }{P(X \ge 1)} \\
												&\Rightarrow \frac{E(X)^2}{E(X^2)} \le P(X \ge 1).
		\end{align*}

	\end{solution}

	\begin{question}{2.}
		Show that there is a positive constant $c$ such that the following holds.   For any $n$ reals $a_1,\ldots,a_n$ satisfying $\sum_{i=1}^n a_i^2 = 1$, if $(\epsilon_1,\ldots,\epsilon_n)$ is a $\{-1,1\}$-random 
		vector obtained by choosing each $\epsilon_i$ randomly and independently with uniform distribution to be either $-1$ or $1$, then 
		\[
			\Pr\left( \left| \sum_{i=1}^n \epsilon_i a_i \right| \le 1 \right) \ge c.
		\]
	\end{question}

	\begin{solution}
		Define the random variable $X = \sum_{i=1}^n a_i \epsilon_i$.  Then
		\[
			E(X) = E \left( \sum_{i=1}^n a_i \epsilon_i \right) = \sum_{i=1}^n a_i E(\epsilon_i) = 0,
		\]
		and because the $\epsilon_i$ are independent we have
		\[
			E(X^2) = E\left( \sum_{i=1}^n a_i \epsilon_i \right) = \sum_{i=1}^n a_i^2 E(\epsilon_i^2) = \sum_{i=1}^n a_i^2 = 1,
		\]
	
	\end{solution}

	\begin{question}{3.}
		Let $v_1,v_2,\ldots,v_n$ be $n$ vectors in $\R^n$, each of Euclidean norm at most $1$, and let $u = \sum_{i=1}^n p_i v_i$, where $0 \le p_i \le 1$ for all $i$.
		\begin{itemize}
			\item [(i)]
				Prove that there are $\epsilon_i \in \{0,1\}$ such that
				\[
					\left\| \sum_{i=1}^n \epsilon_i v_i -u \right\| \le \sqrt{n}/2.
				\]
			\item [(ii)]
				Prove that the above estimate is tight for all $n$.
			\item [(iii)]
				Prove that even for $m > N$ and for $v_1,\ldots,v_m \in \R^n$, each of norm at most $1$, and for $u = \sum_{i=1}^m p_iv_i$ with $0 \le p_i \le 1$, 
				there are $\epsilon_i \in \{0,1\}$ such that
				\[
					\left\| \sum_{i=1}^m \epsilon_i v_i - u \right\| \le \sqrt{n}/2.
				\]
		\end{itemize}
	\end{question}
		
	\begin{solution}
		\begin{itemize}
			\item [(i)]
				If we choose the $\epsilon_i$ independently such that $P(\epsilon_i = 1) = p_i$, then we have
				\begin{align*}
					E\left( \left\| \sum_{i=1}^n (\epsilon_i - p_i)v_i \right\|^2 \right)	&= E \left( \left\langle \sum_{i=1}^n (\epsilon_i - p_i) v_i, \sum_{i=1}^n (\epsilon_i-p_i)v_i \right\rangle \right) \\
																							&= E\left( \sum_{i,j=1}^n (\epsilon_i - p_i)(\epsilon_j - p_j) \langle v_i,v_j \rangle \right) \\
																							&= \sum_{i=1}^n E\left((\epsilon_i-p_i)^2\right)\|v_i\|^2 + \sum_{i\ne j} E\left( (\epsilon_i)(\epsilon_j) \right) \langle v_i,v_j \rangle \\
																							&= \sum_{i=1}^n \left( E(\epsilon_i^2) - 2p_iE(\epsilon_i) + p_i^2\right) + \sum_{i\ne j} E(\epsilon_i - p_i)E(\epsilon_j-p_j) \langle v_i,v_j \rangle \\
																							&= \sum_{i=1}^n (p_i - p_i^2) + \sum_{i\ne j} 0 \\
																							&= \sum_{i=1}^n p_i(1-p_i) \\
																							&\le \frac{n}{4}.
				\end{align*}
				In particular, there is a choice of the $\epsilon_i$ such that 
				\[
					\left\| \sum_{i=1}^n (\epsilon_i-p_i)v_i \right\|^2 \le \frac{n}{4}.
				\]
				Taking square roots then gives the result.
			\item [(ii)]
				To show that this bound is tight, let $v_i = e_i$, where $\{e_1,\ldots,e_n\}$ is the standard basis for $\R^n$.  Then if we take each $p_i = \frac{1}{2}$, we have 
				\[
					\left\| \sum_{i=1}^n (\epsilon_i-p_i)v_i \right\| = \frac{\sqrt{n}}{2}
				\]
				for all choices of $\epsilon_i$.  This is just the geometric fact that the center of the $n$-cube is a distance $\sqrt{n}/2$ from any of its corners.
			\item [(iii)]
		\end{itemize}

	\end{solution}	

	\begin{question}{4.}
		Prove that for every set $X$ of at least $4k^2$ distinct residue classes modulo a prime $p$, there is an integer $a$ such that the set $\{ax \mod p : x \in X\}$ intersects every 
		interval of length at least $p/k$ in $\{0,1,\ldots,p-1\}$.\\
		\textbf{Hint.}  Pick random residues $a$ and $b$ and consider $\{ax + b \mod p: x \in X\}$.
	\end{question}

	\begin{solution}
		For simplicity, we assume that intervals are allowed to wrap around, so there are exactly $p$ intervals (instead of $p-p/k+1$).

		First, notice that if there exists an $a,b \in \Z/p\Z$ such that $aX+b$ intersects every interval of length $p/k$, then $aX$ intersects every interval of length $p/k$ as well, since adding $b$ just permutes the intervals.
		So it is enough to show that there exists and $a$ and a $b$ such that $aX + b$ intersects all intervals of length $p/k$.  Now, divide $\{0,\ldots,p-1\}$ into $2k$ intervals of $B_1,\ldots,B_{2k}$ each of length $p/2k$, specifically 
		\[
			B_i = \{(i-1)p/2k+1,\ldots,ip/2k\}.
		\]
		Now, if we can show that $aX+b$ intersects every interval $B_i$, then $aX+b$ must intersect every interval of length $p/k$ since each interval of length $p/k$ fully contains at least one $B_i$.  To this end, we choose
		$a,b$ uniformly from $\Z/p\Z$ and we define the random variables 
		\[
			Y_i = | \{aX+b\} \cap B_i |
		\]
		Then by linearity of expectation, we have
		\[
			E[Y_i]	= \sum_{x \in X}  P(ax+b \in B_i) = \sum_{x \in X} \frac{1}{2k} = 2k.
		\]
		Now, notice that for $x_i,x_j \in X$ the events $ax_i + b \in B_\ell$ and $ax_j + b \in B_\ell$ are pairwise independent since $ax_j + b$ is uniformly distributed in $\Z/p\Z$ even if we condition on fixed value of $a x_i +b$.
		Thus 
		\begin{align*}
			\Var(Y_i)	&= \sum_{x \in X} \Var( |\{ax+b\}\cap B_i|) + 2 \sum_{x \ne y \in X} \Cov( |\{ax+b\}\cap B_i|,|\{ay+b\}\cap B_i|) \\
						&= \sum_{x \in X} \Var( |\{ax+b\}\cap B_i|) \quad \mbox{ (pairwise independence) } \\
						&= \sum_{x \in X} P(ax+b \in B_i)P(ax+b \not \in B_i) \quad \mbox{ (bernoulli r.v.) } \\
						&= \sum_{x \in X} \frac{1}{2k}\left( 1-\frac{1}{2k}\right) \\
						&= 4k^2\left( \frac{1}{2k} - \frac{1}{4k^2} \right) \\
						&= 2k -1.
		\end{align*}
		Thus by problem (1),
		\[
			P(Y_i = 0 ) \le \frac{\Var(Y_i)}{E(Y_i^2)} = \frac{\Var(Y_i)}{\Var(Y_i)+E(Y_i)^2} = \frac{2k-1}{2k-1+4k^2} < \frac{1}{2k}.
		\]
		The union bound then gives 
		\[
			P(\mbox{ at least one $Y_i = 0$}) \le \sum_{i=1}^{2k} P(Y_i = 0) < 1.
		\]
		In particular, there is some choice of $a,b$ such that $aX+b$ hits every interval $B_i$.

	\end{solution}

	\begin{question}{5.}
		Prove that every $3$-uniform hypergraph with $n$ vertices and $m \ge n/3$ edges contains an independent set of size at least $\frac{2n^{3/2}}{2\sqrt{3}\sqrt{m}}$.
	\end{question}



\end{document}
