\documentclass[a4paper,11pt]{article}

\usepackage{amsmath}
\usepackage{amstext}
\usepackage{amsthm}
\usepackage{mathtools}
\usepackage{braket}
\usepackage{microtype}
\usepackage{pifont}
\usepackage{pdfpages,diagbox}
\usepackage[utf8]{inputenc}
\usepackage{color}
\usepackage{enumerate}
\usepackage{hyperref}
\definecolor{ulbblue}{cmyk}{1,0.75,0.2,0.18}
\hypersetup{colorlinks,
  citecolor=ulbblue,
  linkcolor=ulbblue,
  urlcolor=ulbblue
}

\makeatletter
\renewcommand\@pnumwidth{1.8em}
\makeatother

\usepackage[T1]{fontenc}
\usepackage[charter,cal=cmcal,sfscaled=false]{mathdesign}

\usepackage[paper=a4paper,innermargin=4cm,outermargin=3cm,top=3.5cm,bottom=3.5cm]{geometry}
\setlength{\headheight}{13.6pt}
\usepackage{setspace}
\usepackage[PetersLenny]{fncychap}
\usepackage{fancyhdr}

\def\CC{{\cal C}}

\theoremstyle{definition}
\newtheorem{exercise}{Exercise}
\newtheorem*{definition}{Definition}

\setlength {\textheight}{26 cm} \setlength {\textwidth}{16.5 cm}
\setlength {\topmargin}{-2 cm} \setlength {\oddsidemargin}{-0.4 cm}

\begin{document}
	\noindent
	\fbox{\parbox{\textwidth}{
			INFO-H-451\hfill 2015-2016\\ 
			\centering{
				\textsc{\Large Information and Coding Theory}\\
				\phantom{}}
		}}
		
		\begin{center}
			\section*{\textcolor{red}{Solutions to Exercise Sheet 3}}
		\end{center}
		
		
\begin{exercise}~
	\begin{enumerate}[(a)]
\item\label{1.a} The code $\{\CC(A)=0,\CC(B)=0\}$ is singular. It is neither uniquely decodable nor instantaneous.
\item\label{1.b} The code $\{\CC(A)=0,\CC(B)=010,\CC(C)=01,\CC(D)=10\}$ is trivially nonsingular. It is not
instantaneous, because the first codeword ``$0$'' is a prefix of the second one ``$01$''. The code is not
uniquely decodable. For example, the sequence $010$ can be associated with three distinct codewords $B$, $AD$ or $CA$.
\item\label{1.c} The code $\{\CC(A)=10,\CC(B)=00,\CC(C)=11,\CC(D)=110\}$ is trivially nonsingular. It is not instantaneous: the third codeword ``$11$'' is a prefix of the last one ``$110$''. It is, however, uniquely decodable. 
To decode we look at the first two bits of the sequence. If the first two bits are ``$00$'' then we decode a $B$, if they are ``$10$'' we decode an $A$. However, for ``$11$'' we do not know yet if it is a $C$ or a $D$. To distinguish them we need to count the zeros after the first two ``$1$'':
\begin{itemize}
\item If the number of zeros is even ($2k$), then the first two bits are a $C$ (followed by $k$ $B$'s).
\item If the number of zeros is odd ($2k+1$), then the first two bits are a $D$ (followed by $k$ $B$'s).
\end{itemize}
The rest of the message can be decoded by repeating this procedure.
\item The code $\{\CC(A)=0,\CC(B)=10,\CC(C)=110,\CC(D)=111\}$ is trivially nonsingular. It is instantaneous because no codeword is a prefix of another one. It is uniquely decodable.
\end{enumerate}
\end{exercise}

\begin{exercise}~
An instantaneous code satisfies the Kraft inequality:
\begin{align*}
\sum_i^m D^{-l_i}\le 1.
\end{align*}

If $m=6$ and lengths $\{l_i\} = \{1,1,1,2,2,3\}$ the
Kraft inequality reads: $ 3D^{-1} + 2D^{-2} + D^{-3} \leq 1$. 
Since $D$ must be a positive integer satisfying inequality we have $D=4$. It 
can be verified by testing the Kraft inequality for $D=1, D=2, D=3, D=4$, and $D=5$.

This code is not optimal, because there is another one with shorter codewords: $\{l'_i\} = \{1,1,1,2,2,2\}$. For
instance $\{\CC(x_1)=0,\CC(x_2)=1,\CC(x_3)=2,\CC(x_4)=30,\CC(x_5)=31,\CC(x_6)=32\}$.

Note that if $D=4$ and the number of codewords is 6,
an optimal code does not saturate the Kraft inequality. However, a binary code
does. This can be verified by constructing a tree.
\end{exercise}

\begin{exercise}~
	\begin{enumerate}[(a)]
\item For the random variable $X$ with alphabet $\{A,B,C,D\}$ and associated probability of occurrence
$\{\frac{1}{10},\frac{2}{10},\frac{3}{10},\frac{4}{10}\}$, we construct a binary Huffman code:
\begin{align*}
\left\lbrace A,\frac{1}{10}\  \mid \ B,\frac{2}{10}\  \mid \ C\mid\frac{3}{10} \  \mid \ D,\frac{4}{10}\right\rbrace \Rightarrow \ A & =  0                      \\
                                             B & =  1                      \\
&&\\
\left\lbrace AB,\frac{3}{10} \  \mid \ C,\frac{3}{10} \  \mid \ D,\frac{4}{10}\right\rbrace  \Rightarrow \                                AB & =  0                      \\
                                            C & =  1                      \\
&&\\
\left\lbrace ABC,\frac{6}{10}\  \mid \ D,\frac{4}{10} \right\rbrace \Rightarrow 
                                           ABC & =  0                       \\
                                             D & =  1
 \end{align*}
This gives us the code: $\{\CC(A)=000,\CC(B)=001,\CC(C)=01,\CC(D)=1\}$.
\item Note that if the cardinality of the alphabet $d \ge 3$, it
is not always possible to have enough symbols to combine in packets of $d$. In this
case, it is necessary to add additional symbols which appear with probability zero.  Since at each iteration the number of symbols is reduced by
$d-1$, we should have in total $1+k(d-1)$ symbols, where $k$ is the height of the
tree. Let us construct now a ternary Huffman code for $X$. For $d=3$, we should
have an odd number of symbols. If we add the symbol $E$ with probability zero,
we get:
\begin{align*}
\left\lbrace A,\frac{1}{10} \  \mid \ B,\frac{2}{10}\  \mid \ C,\frac{3}{10}\  \mid \ D,\frac{4}{10}\  \mid \ E,\frac{0}{10}\right\rbrace \Rightarrow A & =  0 \\
                                                               B & =  1 \\
                                                               E & =  2 \\ \\
\left\lbrace ABE ,\frac{3}{10} \  \mid \ C,\frac{3}{10}\  \mid \ D,\frac{4}{10} \right\rbrace  
\Rightarrow                                                    ABE & =  0 \\
                                                               C & =  1 \\
                                                             D & =  2
\end{align*}
This gives us the code: $\{\CC(A)=00,\CC(B)=01,\CC(C)=1,\CC(D)=2\}$.
\item For the source $\{x_i,p_i\}$, a Shannon code has length $l_i= \left\lceil \log_2\frac{1}{p_i}\right\rceil $ for each symbol $x_i$ Therefore, $\{l(A)=4,l(B)=3,l(C)=2,l(D)=2\}$. To
construct such a code we can use the binary Huffman code constructed before. For
example, a possible Shannon code is $\{\CC(A)=0000,\CC(B)=001,\CC(C)=01,\CC(D)=11\}$.

The expected length of the Shannon code is
$\overline{l}_S=2.4\ \mbox{bits}$; the expected length of the Huffman code is
$\overline{l}_H=1.9\ \mbox{bits}$. 

The bound on the optimal code length gives
$
H(X) = -\sum_{i=1}^4 p_i\log_2p_i = 1.85 \ \mbox{bits} $,
which verifies the inequality $H(X)\leq\overline{l}_H\leq\overline{l}_S< H(X)+1$. 

\end{enumerate}
\end{exercise}

\begin{exercise}~
	\begin{enumerate}[(a)]
\item Let $X$ take the values $\{A,B,C,D\}$ with probabilities
$\{ \frac{1}{3} , \frac{1}{3} , \frac{1}{4}, \frac{1}{12} \}$. 
We construct a binary Huffman code for $X$:
\begin{align*}
\left\lbrace A,\frac{1}{3} \ \mid \ B,\frac{1}{3}\ \mid \ C,\frac{1}{4} \ \mid \ D\mid\frac{1}{12}\right\rbrace \Rightarrow    C & =  1 \\
                                              D & =  0 \\ \\
\left\lbrace A,\frac{1}{3} \ \mid \ B,\frac{1}{3} \ \mid \ CD, \frac{1}{3} \right\rbrace \Rightarrow                                   B & =  1 \\
                                             CD & =  0 \\ \\
\left\lbrace BCD,\frac{2}{3}\ \mid \ A,\frac{1}{3} \right\rbrace \Rightarrow 
                                            BCD & =  1 \\
                                              A & =  0
\end{align*}
This gives the code $\{\CC(A)=0,\CC(B)=11,\CC(C)=101,\CC(D)=100\}$.
Alternatively, by regrouping $A$ and $B$ in the second step, we have the
Huffman code $\{\CC(A)=11,\CC(B)=10,\CC(C)=01,\CC(D)=00\}$ with the same
expected length.
\item For the Shannon code we obtain $\{l(A)=2,l(B)=2,l(C)=2,l(D)=4\}$.
To construct this code we can take the Huffman code above. For example, one possible Shannon
codes reads $\{\CC(A)=11,\CC(B)=10,\CC(C)=01,\CC(D)=0000\}$.

The expected length of the Shannon code is
$\overline{l}_S=2.17\ \mbox{bits}$; the expected length of the Huffman code is
$\overline{l}_H=2\ \mbox{bits}$. 

The bound on the optimal code length gives
$
H(X) = -\sum_{i=1}^4 p_i\log_2p_i = 1.86$  bits, verifying the inequality $ \
H(X)\leq\overline{l}_H\leq\overline{l}_S < H(X)+1$.  However, the property
$\overline{l}_H\leq\overline{l}_S$ does not imply that the codewords in the Shannon code are all longer than those in the Huffman code (cf.
the examples above).
\end{enumerate}
\end{exercise}

\begin{exercise}~
	\begin{enumerate}[(a)]
\item Alice flips the coin $k$ times and obtains ``head'' at the $k$-th try. The expected length of the naive code reads
\[
\overline{l}_n=\sum_{k=1}^\infty kp(1-p)^{k-1}=\frac{1}{p}.
\]

The entropy of the random variable $k$ is given by
\begin{align*}
H(k) &= -\sum_{k=1}^\infty (1-p)^{k-1} p\ \log_2 \big((1-p)^{k-1}p\big) \nonumber \\
&= -p\log_2(1-p)\sum_{k=1}^\infty(k-1)(1-p)^{k-1} - p\log_2p\sum_{k=1}^\infty(1-p)^{k-1} \nonumber \\
&= -\bigl[p\log_2(1-p)\bigr]\frac{1-p}{p^2} - \bigl[p\log_2p\bigr]\frac1p \nonumber \\
&=-\frac{1}{p}\big(p\ \log_2 p + (1-p)\ \log_2 (1-p)\big)
\nonumber \\
&= \frac{1}{p}H(p,1-p) \nonumber
\end{align*}
Since $H(p,1-p) \leq 1$, we have $H(k)\leq \frac{1}{p} = \overline{l}_n$.

The naive code is optimal [i.e. $\overline{l}_n=H(k)$] if $p=1/2$.

\item In the limit $p\rightarrow 0$ the expected length of the naive code diverges to infinity. 
The expected length of the Shannon code is approximately the entropy: $\overline{l}_S \simeq H(k)$ 
(remember the inequality $H(k) \leq \overline{l}_S <  H(k) +1$).
In the limit $p\rightarrow 0$ we obtain
\begin{align*}
\lim_{p\rightarrow 0} \overline{l}_S \simeq \lim_{p\rightarrow 0} H(k) 
\simeq \lim_{p\rightarrow 0} \frac{H(p,1-p)}p
\simeq \lim_{p\rightarrow 0} \log_2 \frac{1}{p}, \nonumber
\end{align*}
which also diverges to infinity, but as a logarithmic of that for naive code:
$\overline{l}_n = \frac{1}{p} \simeq 2^{\overline{l}_S}$. 

There is an exponential gain with respect to the naive code.
\end{enumerate}
\end{exercise}

\end{document}