
\documentclass{article}
\usepackage{amsmath,amssymb}
\setlength{\oddsidemargin}{0.25 in}
\setlength{\evensidemargin}{-0.25 in}
\setlength{\topmargin}{-0.6 in}
\setlength{\textwidth}{6.5 in}
\setlength{\textheight}{8.5 in}
\setlength{\headsep}{0.75 in}
\setlength{\parindent}{0 in}
\setlength{\parskip}{0.1 in}
\newcounter{lecnum}
%
% The following macro is used to generate the header.
%
\newcommand{\lecture}[4]{
  \pagestyle{myheadings}
  \thispagestyle{plain}
  \newpage
  \setcounter{lecnum}{#1}
  \setcounter{page}{1}
  \noindent
  \begin{center}
    \framebox{
      \vbox{\vspace{2mm}
        \hbox to 6.28in { {{\bf 18.317~Combinatorics, Probability, and
  Computations on Groups} \hfill #2} }
        \vspace{4mm}
        \hbox to 6.28in { {\Large \hfill Lecture #1  \hfill} }
        \vspace{2mm}
        \hbox to 6.28in { {\it Lecturer: #3 \hfill Scribe: #4} }
        \vspace{2mm}}}
  \end{center}
  \markboth{Lecture #1: #2}{Lecture #1: #2}
  \vspace*{4mm}
}

\input{epsf}
%usage: \fig{LABEL}{FIGURE-HEIGHT}{CAPTION}{FILENAME}
\newcommand{\fig}[4]{
        \begin{figure}
        \setlength{\epsfysize}{#2}
        \centerline{\epsfbox{#4}}
        \caption{#3} \label{#1}
        \end{figure}
        }

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newenvironment{proof}{{\bf Proof:}}{\hfill\rule{2mm}{2mm}}
\newenvironment{mproof}[1]{{\bf #1}}{\hfill\rule{2mm}{2mm}}

\def\E#1{{\rm E}[#1]}
\def\i{\hspace*{5mm}}
\newcounter{lineno}
\setcounter{lineno}{0}
\def\n{\addtocounter{lineno}{1} \hbox to 7mm{\hfill \arabic{lineno}:}\hspace{2mm}}
\def\resetn{\setcounter{lineno}{0}}
\def\tqbfk{\tqbf_k^\exists}
\def\ep{\epsilon}
\def\erd{Erd\H{o}s}

\begin{document}
\lecture{1}{7 September 2001}{Igor Pak}{R. Radoi\v ci\'c}

\section*{Probability of Generating a Group}
Let $G$ be a finite group and let $|G|$ denote the order of $G$.
Let $d(G)$ denote the minimum number of generators of $G$ and $l(G)$ the length of the longest
subgroup chain $1 = G_0 \subsetneq G_1 \subsetneq G_2 \subsetneq \ldots \subsetneq G_l = G$.  Also, let $m(G)$ denote the maximal size of a \emph{non-redundant} generating set, where a generating set $\langle g_1, g_2, \ldots, g_k \rangle$ is called \emph{redundant} if there exists an $i$ such that $\langle g_1, \ldots, g_{i-1}, g_{i+1}, \ldots, g_k \rangle = G$.  Furthermore, let
\[\varphi_k (G) = Pr(\langle g_1, g_2, \ldots, g_k \rangle = G),\]
where $g_i$ are elements of $G$, chosen independently and uniformly at random from $G$.  The main topic of the lecture today is to give a good estimate on $\varphi_k (G)$.  More precisely, for every group $G$, we would like to find the smallest $k$ for which $\varphi_k (G) \geq \frac {1}{3}$ or some other positive constant.  Trivially, $\varphi_k (G) = 0$ for $k < d(G)$.

Let's look at several examples to understand the meaning of the notions above.

For example, let's take $G = \mathbb{Z}_2 \times \mathbb{Z}_3 \times \mathbb{Z}_5 \times \mathbb{Z}_7 \times \ldots \times \mathbb{Z}_p$.  Then, clearly,
\[\varphi_1 (G) = \left(1 - \frac{1}{2}\right)\left(1 - \frac{1}{3}\right)\left(1 - \frac{1}{5}\right) \ldots \left(1 - \frac{1}{p}\right),\]which tends to $0$ as $p \rightarrow \infty$.  Indeed,
\begin{displaymath}
\varphi_1 (G) = \exp\left({\sum_{i < p, i\,\text{prime}} \log\left(1-\frac{1}{i}\right)}\right) \approx \exp\left({-\sum_{i < p, i\,\text{prime}} \frac{1}{i}}\right),
\end{displaymath}
which converges to $0$ since $\sum \frac{1}{i}$ diverges.
On the other hand,
\[\varphi_2 (G) = \left(1 - \frac{1}{2^2}\right)\left(1 - \frac{1}{3^2}\right)\left(1 - \frac{1}{5^2}\right) \ldots \left(1 - \frac{1}{p^2}\right)\]
is strictly positive since $\sum \frac{1}{i^2}$ converges.

If $G$ is ${\mathbb{Z}_2}^r$, i.e an $r$-dimensional vector space on $\{0, 1\}$-vectors, then $d(G)=m(G)=l(G)=r$.  But this is not always the case.

If $G$ is $\mathbb{Z}_{2^r}$, then $d(G)=m(G)=1$, but $l(G)=r$, since $1=\mathbb{Z}_1 \subsetneq \mathbb{Z}_2 \subsetneq \mathbb{Z}_4 \subsetneq \ldots \mathbb{Z}_{2^r}$.

Now, let $G$ = $S_n$, the permutation group on $n$ letters.  Since $G = \langle (1, 2), (1, 2, \ldots, n) \rangle$ and since $S_n$ is not a cyclic group, then $d(G) = 2$.  Because there are $n-1$ adjacent transpositions $(1, 2), (2, 3), \ldots, (n-1, n)$, we have $m(G) \geq n-1$.
Actually, Whiston \cite{W00} showed that $m(G)=n-1$.

What about $l(S_n)$?  Well, for any group $G$ we have the following trivial bounds:

\begin{proposition}
\begin{displaymath}
d(G) \leq m(G) \leq l(G) \leq \log_2 |G|
\end{displaymath}
\end{proposition}

\begin{proof}
The first and the last inequality are obvious, while the middle inequality follows from the implication:
$m(G)=k$ and $\langle g_1, g_2, \ldots, g_k \rangle$ is the maximal non-redundant generating set $\Rightarrow \langle g_1 \rangle \subsetneq \langle g_1, g_2 \rangle \subsetneq \ldots \subsetneq \langle g_1, g_2, \ldots, g_k \rangle$.
\end{proof}

One of the serious theorems in this subject shows that $l(S_n) \approx \frac{3}{2}n$, but its proof is quite involving and uses the classification theorems \cite{B86}, \cite{CST89}.  We are going to show a weaker but elegant statement:

\begin{theorem}
\begin{displaymath}
l(S_n) = O(n\log\log{n})
\end{displaymath}
\end{theorem}

\begin{proof}
We will use $\bigcirc_p$ to denote the highest power of $p$ dividing $n!$.  Then we have
\begin{displaymath}
\bigcirc_p = \lfloor \frac {n}{p} \rfloor + \lfloor \frac {n}{p^2} \rfloor + \ldots = \sum_{i} \lfloor \frac {n}{p^i} \rfloor \leq \frac {n}{p\left(1-\frac{1}{p}\right)} = \frac{n}{p-1}.
\end{displaymath}
Then clearly:
\begin{displaymath}
l(S_n) \leq \sum_{p \leq n, p\,\text{prime}} \bigcirc_p \leq n\sum_{p \leq n} \frac{1}{p-1} \leq n\log\log{n} + O(n),
\end{displaymath}
where the last inequality follows from the Prime Number Theorem:
\begin{displaymath}
\sum_{p \leq n} \frac{1}{p-1} \sim \int_{1}^{n} \frac{dx}{x\log{x}} = \int \frac{d\,\log{x}}{\log{x}} = \log\log{x}\big{|}_1^n.
\end{displaymath}
\end{proof}

\begin{definition}
We define the random group process $\{B_t\}$:

$B_0 = 1$ and for $t > 0$, $B_{t+1} = \langle B_t, g_{t+1} \rangle$, where $g_{t+1} \in G$ is a random element chosen at moment $t+1$.
We obtain the chain of subgroups $B_0 \subset B_1 \subset B_2 \subset \ldots \subset B_t \subset \ldots \subset G$.  Let $\tau (G)$ denote the stopping time of $\{B_t\}$, i.e.
\[ \tau := \min \{t: B_t = G\}\]
\end{definition}

\begin{proposition}
\begin{displaymath}
\mathbf{E}[\tau] \leq 2\log_2{|G|}
\end{displaymath}
\end{proposition}

\begin{proof}
Given that $t < \tau$ (i.e. $B_t \neq G$), we have
\[Pr(B_{t+1} \neq B_{t}) = 1-\frac{|B_{t}|}{|G|} \geq \frac{1}{2}.\]
Thus, the expected time for the random group process to increase the order of the current subgroup is $\leq 2$.  Hence, $\mathbf{E}[\tau] \leq 2\log_2{|G|}$.
\end{proof}

If $G={\mathbb{Z}_2}^r$, then the inequality in the previous proof is an equality, so, in a sense, ${\mathbb{Z}_2}^r$ is the worst to generate.  Notice that we actually proved the following stronger statement:

\begin{proposition}
\begin{displaymath}
\mathbf{E}[\tau] \leq 2l(G)
\end{displaymath}
\end{proposition}

However, $l(G)$ in the proposition above cannot be replaced by $m(G)$, which is clear, e.g. when $G={\mathbb{Z}_2}^r$.  Still, the result is not the best possible.  Clearly, $\mathbf{E}[\tau] \geq l(G)$, but we will show today that the multiplicative constant factor in front of $l(G)$ can be shed.

\begin{theorem}
Let $|G| \leq 2^r$.  Then for all $k$, $\varphi_k (G) \geq \varphi_k ({{\mathbb{Z}_2}^r})$.
\end{theorem}

\begin{proof}
Fix $k$ and a subgroup $A \subsetneq G$.
Let $B_t$ and $B_t'$ be the random group processes for $G$ and ${\mathbb{Z}_2}^r$, respectively.
Let $\tau_1, \tau_2, \ldots, \tau_L = \tau$ denote times $t$ for which $B_t \neq B_{t-1}$.  Similarly, define
$\tau_1', \tau_2', \ldots, \tau_{R}' = \tau'$.
We will use the induction on $|G|$.  When $|G| = 1$, the theorem is trivial.  Let $s := \tau_{L-1}$.  We need to show that
\begin{displaymath}
Pr(\tau_L - \tau_{L-1} \leq k \big{|} B_s = A) \geq Pr(\tau_R' - \tau_{R-1}' \leq k)
\end{displaymath}
Indeed, the lefthand side is equal to $1-(\frac{|A|}{|G|})^k$, and the righthand side is equal to $1-Pr(\tau_R'-\tau_{R-1}' > k) = 1-\frac{1}{2^k}$.

Now $\frac{|A|}{|G|} \leq \frac{1}{2}$ and the claim above follows.

This claim, combined with the induction assumption $Pr(\tau_{L-1} \leq k) \geq Pr(\tau_{R-1}' \leq k)$, gives
\begin{displaymath}
Pr(\tau_L \leq k \big{|} B_s = A) \geq Pr(\tau_R' \leq k) = \varphi_k ({\mathbb{Z}_2}^r).
\end{displaymath}
This holds for any fixed $k$ and $A$, so the theorem follows.
\end{proof}

\begin{thebibliography}{}

\bibitem[W00]{W00}
{J. Whiston:}
{Maximal independent generating sets of the symmetric group,}
{\it Journal of Algebra} 232 (2000), 255--268.

\bibitem[B86]{B86}
{L. Babai:}
{On the length of subgroup chains in the symmetric group,}
{\it Comm. Algebra} 14 (1986), 1729--1736.

\bibitem[CST89]{CST89}
{P. J. Cameron, R. Solomon, A. Turull:}
{Chains of subgroups in symmetric groups,}
{\it Journal of Algebra} 127 (1989), 340--352.

\end{thebibliography}

\end{document}
