\documentclass[12pt]{article}

\usepackage[french]{babel}
\usepackage{amsmath}

\voffset -3.2 cm 
\textwidth 17 cm
\textheight 25.8 cm
\oddsidemargin -.54 cm
\evensidemargin -1.54 cm
\usepackage[latin1]{inputenc}
\pagestyle{plain}
\def\CC{{\cal C}}


\begin{document}

\centerline {\Large \sc Information and coding theory}
\bigskip
\bigskip
\hrule
\bigskip
\bigskip
\centerline {\underline{\Large Solution for the exercise sheet n$^\circ$ 3}:} 
\bigskip


\noindent {\bf 3-1.} 
\par
\medskip
\noindent (a) The code $\{\CC(A)=0,\CC(B)=0\}$ is singular. It is neither uniquely decodable nor instantaneous.

\par
\medskip
\noindent (b) The code $\{\CC(A)=0,\CC(B)=010,\CC(C)=01,\CC(D)=10\}$ is trivially nonsingular. It is not
instantaneous, because the first codeword ``$0$'' is a prefix of the second one ``$01$''. The code is not
uniquely decodable: For example, the sequence $010$ can be associated to three distinct codewords $B$, $AD$ or $CA$.

\par
\medskip
\noindent (c) The code $\{\CC(A)=10,\CC(B)=00,\CC(C)=11,\CC(D)=110\}$ is trivially nonsingular. It is not instantaneous: the third codeword ``$11$'' is a prefix of the last one ``$110$''. We will prove that the code is uniquely decodable. 
To decode we look at the first two bits of the sequence. If the first two bits are ``$00$'' then one decodes a $B$, if they are ``$10$'' one decodes an $A$. However, for ``$11$'' we do not know yet if it is a $C$ or a $D$. To distinguish one needs to count the zeros after the first two ``$1$'':
\begin{itemize}
\item If the number of zeros is even ($2k$), then the first two bits are a $C$ (followed by $k$ $B$'s).
\item If the number of zeros is odd ($2k+1$), then the first two bits are a $D$ (followed by $k$ $B$'s).
\end{itemize}
One continues decoding the rest by repeating this method. 
\par
\medskip
\noindent (d) The code $\{\CC(A)=0,\CC(B)=10,\CC(C)=110,\CC(D)=111\}$ is trivially nonsingular. It is instantaneous because no codeword is a prefix of another one. It is uniquely decodable.

\bigskip
\noindent {\bf 3-2.} 
\par
\medskip
\noindent An instantaneous code satisfies the Kraft inequality: 
$\quad \sum_i^m D^{-l_i}\le 1$. \newline \noindent If $m=6$ and lengths $\{l_i\} = \{1,1,1,2,2,3\}$ the
Kraft inequality reads: $ 3D^{-1} + 2D^{-2} + D^{-3} \leq 1$. 
Since $D$ must be a minimal positive integer number satisfying inequality we have $D=4$, that 
can be verified by testing the Kraft inequality for $D=1, D=2, D=3, D=4$, and $D=5$.

This code is not optimal, because there is one with codeword lengths $\{l'_i\} = \{1,1,1,2,2,2\}$. For
instance $\{\CC(x_1)=0,\CC(x_2)=1,\CC(x_3)=2,\CC(x_4)=30,\CC(x_5)=31,\CC(x_6)=32\}$.

Remark: If $D=4$ and amount of codewords is 6,
an optimal code does not saturate the Kraft inequality. However, a binary code
does. This can be verified by constructing a tree.
% In contrary to binary codes, for $D=4$ and 6 codewords, an optimal code does not saturate the Kraft
%inequality. 

\bigskip
\noindent {\bf 3-3.} 
\par
\medskip
\noindent (a) For the random variable $X$ with letters $\{A,B,C,D\}$ which appear with probabilities
$\{\frac{1}{10},\frac{2}{10},\frac{3}{10},\frac{4}{10}\}$, we construct a binary Huffman code:
\begin{eqnarray*}
\left\lbrace A,\frac{1}{10}\  \mid \ B,\frac{2}{10}\  \mid \ C\mid\frac{3}{10} \  \mid \ D,\frac{4}{10}\right\rbrace \Rightarrow \ A & = & 0                      \\
                                             B & = & 1                      \\
&&\\
\left\lbrace AB,\frac{3}{10} \  \mid \ C,\frac{3}{10} \  \mid \ D,\frac{4}{10}\right\rbrace  \Rightarrow \                                AB & = & 0                      \\
                                            C & = & 1                      \\
&&\\
\left\lbrace ABC,\frac{6}{10}\  \mid \ D,\frac{4}{10} \right\rbrace \Rightarrow 
                                           ABC & = & 0                       \\
                                             D & = & 1
 \end{eqnarray*}
This leads to the code: $\{\CC(A)=000,\CC(B)=001,\CC(C)=01,\CC(D)=1\}$.
\par
\medskip
\noindent (b)  We remark that if the cardinality of the alphabet $d \ge 3$, it
is not always possible to have enough symbols to combine in packets of $d$. In this
case, it is necessary to add additional symbols which appear with zero
probabilities.  Since at each iteration the number of symbols is reduced by
$d-1$, we should have in total $1+k(d-1)$ symbols, where $k$ is the depth in the
tree. Let us construct now a ternary Huffman code for $X$. For $d=3$, we should
have an odd number of symbols. If we add the symbol $E$ with zero probability,
we get:
\begin{eqnarray*}
\left\lbrace A,\frac{1}{10} \  \mid \ B,\frac{2}{10}\  \mid \ C,\frac{3}{10}\  \mid \ D,\frac{4}{10}\  \mid \ E,\frac{0}{10}\right\rbrace \Rightarrow A & = & 0 \\
                                                               B & = & 1 \\
                                                               E & = & 2 \\ \\
\left\lbrace ABE ,\frac{3}{10} \  \mid \ C,\frac{3}{10}\  \mid \ D,\frac{4}{10} \right\rbrace  
\Rightarrow                                                    ABE & = & 0 \\
                                                               C & = & 1 \\
                                                             D & = & 2
\end{eqnarray*}
This leads to the code: $\{\CC(A)=00,\CC(B)=01,\CC(C)=1,\CC(D)=2\}$.


\par
\medskip
\noindent (c) A Shannon code, associates 
a length $l_i= \left\lceil \log_2\frac{1}{p_i}\right\rceil $
with each symbol $x_i$, that is emitted
by the source $\{x_i,p_i\}$. Thus, $\{l(A)=4,l(B)=3,l(C)=2,l(D)=2\}$. To
construct such a code one can take the previous binary Huffman code. For
example, a possible Shannon code is $\{\CC(A)=0000,\CC(B)=001,\CC(C)=01,\CC(D)=11\}$.

The expected length of the Shannon code is
$\overline{l}_S=2.4\ \mbox{bits}$; the expected length of the Huffman code is
$\overline{l}_H=1.9\ \mbox{bits}$. 

The bound on the optimal code length reads
$
H(X) = -\sum_{i=1}^4 p_i\log_2p_i = 1.85 \ \mbox{bits} $. \newline \noindent
Thus, we verified the inequality $H(X)\leq\overline{l}_H\leq\overline{l}_S< H(X)+1$. 

\bigskip
\noindent {\bf 3-4.} 
\par
\medskip
\noindent (a) Let $X$ take the values $\{A,B,C,D\}$ with the given probabilities
$\{ \frac{1}{3} , \frac{1}{3} , \frac{1}{4}, \frac{1}{12} \}$. 
We construct a binary Huffman code for $X$:
\begin{eqnarray*}
\left\lbrace A,\frac{1}{3} \ \mid \ B,\frac{1}{3}\ \mid \ C,\frac{1}{4} \ \mid \ D\mid\frac{1}{12}\right\rbrace \Rightarrow    C & = & 1 \\
                                              D & = & 0 \\ \\
\left\lbrace A,\frac{1}{3} \ \mid \ B,\frac{1}{3} \ \mid \ CD, \frac{1}{3} \right\rbrace \Rightarrow                                   B & = & 1 \\
                                             CD & = & 0 \\ \\
\left\lbrace BCD,\frac{2}{3}\ \mid \ A,\frac{1}{3} \right\rbrace \Rightarrow 
                                            BCD & = & 1 \\
                                              A & = & 0
\end{eqnarray*}
This gives the code $\{\CC(A)=0,\CC(B)=11,\CC(C)=101,\CC(D)=100\}$.
Alternatively, by regrouping $A$ and $B$ in the second step, one obtains the
Huffman code $\{\CC(A)=11,\CC(B)=10,\CC(C)=01,\CC(D)=00\}$ with the same
expected length.
\par

\par
\medskip
\noindent (b) For the Shannon code we obtain $\{l(A)=2,l(B)=2,l(C)=2,l(D)=4\}$.
To construct this code we can take the previous Huffman code. For example, one of possible Shannon
codes reads $\{\CC(A)=11,\CC(B)=10,\CC(C)=01,\CC(D)=0000\}$.

The expected length of the Shannon code is
$\overline{l}_S=2.17\ \mbox{bits}$; the expected length of the Huffman code is
$\overline{l}_H=2\ \mbox{bits}$. 

The bound on the optimal code length reads
$
H(X) = -\sum_{i=1}^4 p_i\log_2p_i = 1.86$  bits. 
Thus, we have verified the inequality: $ \
H(X)\leq\overline{l}_H\leq\overline{l}_S < H(X)+1$.  However, the property
$\overline{l}_H\leq\overline{l}_S$ does not imply that the length of all
codewords of the Shannon code are bigger than the ones of the Huffman code (see
previous examples).

\bigskip
\noindent {\bf 3-5.} 
\par
\medskip
\noindent (a) Alice flips the coin $k$ times and obtains ``head'' at the $k$-th try. The expected length of the naive code reads
\[
\overline{l}_n=\sum_{k=1}^\infty kp(1-p)^{k-1}=\frac{1}{p}.
\]

The entropy of the random variable $k$ is given by
\begin{eqnarray}
H(k) &=& -\sum_{k=1}^\infty (1-p)^{k-1} p\ \log_2 \big((1-p)^{k-1}p\big) \nonumber \\
&=& -p\log_2(1-p)\sum_{k=1}^\infty(k-1)(1-p)^{k-1} - p\log_2p\sum_{k=1}^\infty(1-p)^{k-1} \nonumber \\
&=& -\bigl[p\log_2(1-p)\bigr]\frac{1-p}{p^2} - \bigl[p\log_2p\bigr]\frac1p \nonumber \\
&=&-\frac{1}{p}\big(p\ \log_2 p + (1-p)\ \log_2 (1-p)\big)
\nonumber \\
&=& \frac{1}{p}H(p,1-p) \nonumber
\end{eqnarray}
Since $H(p,1-p) \leq 1$, we have $H(k)\leq \frac{1}{p} = \overline{l}_n$.
\newline
The naive code is optimal [i.e. $\overline{l}_n=H(k)$] if $p=1/2$.
\par
\medskip
\noindent (b) In the limit $p\rightarrow 0$ the expected length of the naive code diverges to infinity. 
The expected length of the Shannon code is approximately the entropy: $\overline{l}_S \simeq H(k)$ 
(remember the inequality $H(k) \leq \overline{l}_S <  H(k) +1$).
In the limit $p\rightarrow 0$ we obtain
\begin{eqnarray}
\lim_{p\rightarrow 0} \overline{l}_S \simeq \lim_{p\rightarrow 0} H(k) 
\simeq \lim_{p\rightarrow 0} \frac{H(p,1-p)}p
\simeq \lim_{p\rightarrow 0} \log_2 \frac{1}{p}, \nonumber
\end{eqnarray}
which also diverges to infinity, but as a logarithmic of that for naive code:
$\quad \overline{l}_n = \frac{1}{p} \simeq 2^{\overline{l}_S}$. 
Thus, there is an exponential gain with respect to the naive code.

\end{document}