\documentclass[a4paper,11pt]{article}

\usepackage{amsmath}
\usepackage{amstext}
\usepackage{amsthm}
\usepackage{mathtools}
\usepackage{braket}
\usepackage{microtype}
\usepackage{pifont}
\usepackage{pdfpages,diagbox}
\usepackage[utf8]{inputenc}
\usepackage{color}
\usepackage{enumerate}

\makeatletter
\renewcommand\@pnumwidth{1.8em}
\makeatother

\usepackage[T1]{fontenc}
\usepackage[charter,cal=cmcal,sfscaled=false]{mathdesign}

\usepackage[paper=a4paper,innermargin=4cm,outermargin=3cm,top=3.5cm,bottom=3.5cm]{geometry}
\setlength{\headheight}{13.6pt}
\usepackage{setspace}
\usepackage[PetersLenny]{fncychap}
\usepackage{fancyhdr}
\usepackage{slashbox}


\theoremstyle{definition}
\newtheorem{exercise}{Exercise}
\newtheorem*{definition}{Definition}

\setlength {\textheight}{26 cm} \setlength {\textwidth}{16.5 cm}
\setlength {\topmargin}{-2 cm} \setlength {\oddsidemargin}{-0.4 cm}

\begin{document}
\noindent
\fbox{\parbox{\textwidth}{
INFO-H-422\hfill 2016-2017\\ 
\centering{
\textsc{\Large Information and Coding Theory}\\
\phantom{}}
}}

\begin{center}
	\section*{\textcolor{red}{Solutions to Exercise Sheet 1}}
\end{center}

\begin{exercise}~
	\begin{enumerate}[(a)]
		\item  Example: ${\mathcal{X}} = \{0,1, \dots m-1 \}$. With $p(x)=\frac{1}{m}$ 
		for $x \in {\mathcal{X}}$.
		\item $H(X)=-\sum_{i=0}^{m-1}p_{i}\log_{2}p_{i}=\log_{2}m=6$ bits.
		\item 6 bits are needed since $2^6=64$.
		\item 3 symbols are needed since $4^3=64$.
		\item Define the Lagrangian $\qquad
		L(\{p_{i}\})=H(X)+\lambda[\sum_{i=0}^{m-1}p_{i}-1].$ \newline
		Condition for an extremum: $$\forall i,   \frac{\partial L}{\partial p_{i}}=0.$$
		The distribution that maximizes $H(X)$ (note that $H(X)$ is concave) satisfies:
		\[  -\log_{2}p_{i}-\frac{1}{\ln 2}+\lambda=0 \qquad \forall i. \]
		It follows that $p_{i}=2^{\lambda-\frac{1}{\ln 2}}$, \emph{i.e.} $p_i$ is a constant. If the constraint is applied then $p_{i}=\frac{1}{m}$.
	\end{enumerate}
	
\end{exercise}
\begin{exercise}~
	\begin{enumerate}[(a)]
		\item $H(X_p)=1.75$ bits.
		\item $H(X_q)=2$ bits.
		\item The expected length of the codewords is $1.75$ bits for the distribution $p$ and $2.25$~bits for the distribution $q$.
		\item The entropy gives the minimal expected length of codewords one can obtain. The binary code $C$ is optimal for the distribution $p$, since its expected length $L_p=H(X_p)$. For the distribution $q$ we find $L_q > H(X_q)$ and $L_q>L_p$, which implies that the code is not optimal. The optimal code for $q$ is given by a simple enumeration of the elements of $X$; therefore it is impossible to compress that source.
	\end{enumerate}
\end{exercise}

\begin{exercise}
	\begin{enumerate}[(a)]
		\item $H(X)=2$ bits.
		\item Sequence of questions:\\
		Did ``head'' come up on the first flip?\\
		Did ``head'' come up on the second flip??\\
		\vdots\\
		Did ``head'' come up on the $n$th flip?\\
		One bit can be associated with the answer to each question. The answers to $n$ questions are therefore encoded in $n$ bits. The expected number of ``yes/no'' questions is given by $\sum_{n=1}^{\infty}p(n)n=H(X)=2$. It is equal to the entropy, which shows that the sequence of questions is optimal.
	\end{enumerate}
\end{exercise}

\begin{exercise}~
	\begin{enumerate}[(a)]
		\item $H(Y)=H(X)=1.875$ bits, because the function is bijective (i.e. fixing $Y$ also fixes $X$).
		\item The function is not bijective, so $H(Y)< H(X)$ with $H(X)=1/2+\log_2 3\approx2.085$ bits and $H(Y)=3/2+1/2 \log_2 3-5/12 \log_2 5\approx 1,325$ bits.
		\item $H(X,f(X))=H(X)+H(f(X)|X)$ but $H(f(X)|X)=0$, because knowing $X$ fixes $f(X)$.\\
		$H(f(X),X)=H(f(X))+H(X|f(X))$ but $H(X|f(X))\geq 0$.\\
		Finally:
		$H(f(X),X)=H(X,f(X))$ implies $H(f(X))\leq H(X)$.\\
		It is saturated if $H(X|f(X))=0$, i.e. if the function $Y=f(X)$ is bijective.
	\end{enumerate}
\end{exercise}

\begin{exercise}~
	\begin{enumerate}[(a)]
		\item Definition of the conditional entropy: $H(Y|X)=\sum_{x\in {{\mathcal{X}}}}p(x)H(Y|X=x)$.
		\[
		H(Z|Y)=\sum_{y\in {{\mathcal{Y}}}}p(y)H(Z|Y=y)=\sum_{y\in {{\mathcal{Y}}}}p(y)H(X+Y|Y=y)=\sum_{y\in {{\mathcal{Y}}}}p(y)H(X|Y=y)=H(X|Y).
		\]
		If $X$ and $Y$ are independent, then $H(X|Y)=H(X)$.
		\newline 
		As conditioning can only reduce the entropy: $H(Z|Y)\leq H(Z)$.
		\newline
		We finally obtain $H(X)\leq H(Z)$, and similarly $H(Y)\leq H(Z)$.
		\item Example:\\
		\begin{center}
        \begin{tabular}[htbp!]{|c|c|c|c|c||c|}
        	\hline
        	\backslashbox{X}{Y}&-1&-2&-3&-4&$P(X)$\\
        	\hline
        	1& $1/4$ & $0$   & $0$   & $0$   & $1/4$\\
        	\hline
        	2&$0$   & $1/4$ & $0$   & $0$   & $1/4$\\
        	\hline
        	3&$0$   & $0$   & $1/8$ & $1/8$ & $1/4$\\
        	\hline
        	4&$0$   & $0$   & $1/8$ & $1/8$ & $1/4$\\
        	\hline
        	\hline
        	$P(Y)$&$1/4$ & $1/4$ & $1/4$ & $1/4$ &\\
        	\hline
        \end{tabular}
%		\begin{tabular}{|cc|cccc|c|}
%			\hline
%			\multicolumn{2}{|c|}{P($X=x,Y=y$)}
%			&\multicolumn{1}{c}{$Y=$$-1$}
%			&\multicolumn{1}{c}{$-2$}
%			&\multicolumn{1}{c}{$-3$}
%			&\multicolumn{1}{c}{$-4$}
%			&\multicolumn{1}{|c|}{P($X=x$)}
%			\\
%			\hline
%			$X=$     & $1$ & $1/4$ & $0$   & $0$   & $0$   & $1/4$ \\
%			& $2$ & $0$   & $1/4$ & $0$   & $0$   & $1/4$ \\
%			& $3$ & $0$   & $0$   & $1/8$ & $1/8$ & $1/4$ \\
%			& $4$ & $0$   & $0$   & $1/8$ & $1/8$ & $1/4$ \\
%			\hline
%			& P($Y=y$)   & $1/4$ & $1/4$ & $1/4$ & $1/4$ &  $1$  \\
%			\hline
%		\end{tabular}
		\end{center}
		\emph{How to compute $H(X)$ and $H(Y)$:}\\
		$H(Y)=H(X)=H(1/4,1/4,1/4,1/4)=\log_{2}4=2$ bits.\\
		We have ${{\mathcal{Z}}}=$\{$3,2,1,0,-1,-2,-3$\} with $P(Z=0)=3/4$, $P(Z=1)=1/8$ and $P(Z=-1)=1/8$. All other probabilities are zero.\\
		\emph{How to compute $H(Z)$:}\\
		$H(Z)=-\frac{3}{4}\log_{2}\frac{3}{4}-\frac{1}{4}\log_{2}\frac{1}{8}=1.061\,\mathrm{bits.}$\\
		Note that $H(X)> H(Z)$ and $H(Y)> H(Z)$. 
		\item We require that $X$ and $Y$ are independent and all $z_{i,j}=x_{i}+y_{j}$ 
		are distinct for all pairs $(i,j)$. If these conditions are satisfied then  $p_{z}(i,j)=p_{x}(i)p_{y}(j)$,
		which gives us the solution (after substituting it in the definition of $H(Z)$).\\
		Example:
		${\mathcal{X}}=\{1,2,3\}$ and ${\mathcal{Y}}=\{10,20,30,40\}$ for any probability distribution of $X$ and $Y$, where $X$ and $Y$ are independently distributed.
	\end{enumerate}
\end{exercise}

\begin{exercise}~\textbf{Optional}
	\begin{center}
		 \begin{tabular}[htbp!]{|c|c|c|c||c|}
		 	\hline
		 	\backslashbox{Y}{X}&-1&0&1&$P(Y)$\\
		 	\hline
		 	-2& $0$   & $1/3$ & $0$   & $1/3$\\
		 	\hline
		 	1&$1/3$ & $0$   & $1/3$ & $2/3$\\
		 	\hline
		 	\hline
		 	$P(X)$& $1/3$ & $1/3$ & $1/3$  &\\
		 	\hline
		 \end{tabular}
%	\begin{tabular}[htbp!]{|cc|ccc|c|}
%		\hline
%		\multicolumn{2}{|c|}{P($X,Y$)}
%		&\multicolumn{1}{c}{$X=$ $-1$}
%		&\multicolumn{1}{c}{$0$}
%		&\multicolumn{1}{c}{$1$}
%		&\multicolumn{1}{|c|}{P(Y)}
%		\\
%		\hline
%		$Y=$    & $-2$ & $0$   & $1/3$ & $0$   & $1/3$ \\
%		& $1$  & $1/3$ & $0$   & $1/3$ & $2/3$ \\
%		\hline
%		P(X)    &     & $1/3$ & $1/3$ & $1/3$  & $1$    \\
%		\hline
%	\end{tabular}
	\end{center}
	In this example, because $\left\langle X\right\rangle=\left\langle Y\right\rangle=\left\langle XY\right\rangle=0$, which makes $r=0$.\\
	$H(X:Y)=H(X)+H(Y)-H(X,Y)=0.918$ bit.
\end{exercise}

\end{document}