\documentclass[a4paper,11pt]{article}

\usepackage{amsmath}
\usepackage{amstext}
\usepackage{amsthm}
\usepackage{mathtools}
\usepackage{braket}
\usepackage{graphicx,subcaption}
\usepackage[percent]{overpic}
\usepackage{bbm}
\usepackage{bm}
\usepackage{threeparttable}
\usepackage{tabularx}
\usepackage{multirow}
\usepackage{booktabs}
\usepackage[nohyperlinks]{acronym}
\usepackage{microtype}
\usepackage{pifont}
\usepackage{pdfpages,diagbox}
\usepackage[utf8]{inputenc}
\usepackage[labelformat=empty,labelsep=none]{caption}
\usepackage{enumerate}
\usepackage{slashbox}
\usepackage{hyperref}
\definecolor{ulbblue}{cmyk}{1,0.75,0.2,0.18}
\hypersetup{colorlinks,
  citecolor=ulbblue,
  linkcolor=ulbblue,
  urlcolor=ulbblue
}

\makeatletter
\renewcommand\@pnumwidth{1.8em}
\makeatother

\usepackage[T1]{fontenc}
\usepackage{textcomp}
\usepackage{fix-cm}
\usepackage[charter,cal=cmcal,sfscaled=false]{mathdesign}

\usepackage[paper=a4paper,innermargin=4cm,outermargin=3cm,top=3.5cm,bottom=3.5cm]{geometry}
\setlength{\headheight}{13.6pt}
\usepackage{setspace}
\usepackage[PetersLenny]{fncychap}
\usepackage{fancyhdr}


\theoremstyle{definition}
\newtheorem{exercise}{Exercise}
\newtheorem*{definition}{Definition}


\setlength {\textheight}{26 cm} \setlength {\textwidth}{16.5 cm}
\setlength {\topmargin}{-2 cm} \setlength {\oddsidemargin}{-0.4 cm}

\begin{document}
\noindent
\fbox{\parbox{\textwidth}{
INFO-H-451\hfill 2014-2015\\ 
\centering{
\textsc{\Large Information and Coding Theory}\\
Exercise Sheet 2}
}}

\section*{}
\begin{exercise}~The random variables $X$ and $Y$ have the joint probability distribution given by the table below:\\
\begin{center}
\def\arraystretch{1.3}
\begin{tabular}[htbp!]{|c||c|c|c|}
\hline
\backslashbox{X}{Y}
&0&1&2\\
\hline\hline
0&$\frac{1}{9}$ & $\frac{1}{9}$ & $\frac{1}{9}$\\
\hline
1& $\frac{1}{9}$ & $0$ & $\frac{2}{9}$\\
\hline
2& $\frac{1}{9}$ & $\frac{2}{9}$ & $0$\\
\hline

%\hline
%&&\multicolumn{3}{|c|}{$Y$}
%\\ 
%\multicolumn{2}{|c|}{P($X,Y$)}
%&\multicolumn{1}{c}{$0$}
%&\multicolumn{1}{c}{$1$}
%&\multicolumn{1}{c|}{$2$}
%\\
%\hline
%      & $0$ & $\frac{1}{9}$ & $\frac{1}{9}$ & $\frac{1}{9}$ \\
%      \hline
% $X$  & $1$ & $\frac{1}{9}$ & $0$ & $\frac{2}{9}$  \\
%      \hline
%      & $2$ & $\frac{1}{9}$ & $\frac{2}{9}$ & $0$ \\
%      \hline

%\hline
\end{tabular}
\end{center}
Find:
\begin{enumerate}[(a)]
\item $H(X)$ and $H(Y)$.
\item $H(X|Y)$ and $H(Y|X)$.
\item $H(X,Y)$.
\item $I(X;Y)$.
\item Draw a Venn diagram for the quantities obtained above.
\end{enumerate}
\end{exercise}

\begin{exercise}~ An urn contains $r$ red, $g$ green and $b$ black balls. We consider as a random variable $X$ the color of a ball that is randomly drawn from the urn.
\begin{enumerate}[(a)]
\item\label{a} Show that the entropy of drawing $k$ balls with replacement is given by
$H(X_{1},X_{2},...,X_{k})=k\cdot H(X)$ where $H(X)$ is the entropy of a single draw.
\item\label{b} If the balls are not replaced, and if we do not look at the color of the balls that were taken out, give an intuitive argument why $H(X_i)=H(X)$ where $H(X)$ is the entropy of an arbitrary draw. Without appealing to intuition, it might be useful to start solving (\ref{c}) to (\ref{f}).
\item\label{c} The balls are not replaced. Compare the probability that has the colors $c_1$ at the first draw and $c_2$ at the second draw to the probability that has the color $c_2$ at the first draw and $c_1$ at the second draw. 
\item\label{d} The balls are not replaced. What is the probability that a red ball is drawn at the second draw?
\item\label{e} The balls are not replaced. What can we say about the marginal probabilities?
\item\label{f} The balls are not replaced. Compare the entropy of the second draw with that of the first draw.
\item\label{g} Show that the entropy of drawing $k$ balls without replacements is less than or equal to the entropy of drawing $k$ balls with replacement.
\end{enumerate}
\end{exercise}

\begin{exercise}~The random variables $X$, $Y$ and $Z$ form a Markov chain $X\rightarrow Y\rightarrow Z$ if the conditional probability of $Z$ depends only on $Y$, i.e. $p(X,Y,Z)=p(X)p(Y|X)p(Z|Y)$.
\begin{enumerate}[(a)]
\item\label{a} Show that $X$ and $Z$ are conditionally independent given $Y$, i.e. $p(X,Z|Y)=p(X|Y)p(Z|Y)$. 
\item Show that $I(X;Y)\geq I(X;Z)$ by using the result of (\ref{a}) and the chain rule for mutual information $I(X;Y|Z)$. 
\item The random variables $X$, $Y$ and $Z$ have probability distributions on sets 
${\mathcal{X}}$ of cardinality $n$, ${\mathcal{Y}}$ of cardinality $k$ and ${\mathcal{Z}}$ of cardinality $m$. Show that if $n>k$ and $m>k$ then $I(X;Z)\leq \log_{2} k$. 
\item Explain what happens if $k=1$.
\end{enumerate}
\end{exercise}

\begin{exercise}~Consider binary sequences of length $n$ emitted by a source of independent and identically distributed (i.i.d.) random variables. Each bit is emitted independently of the previous one according to the binomial distribution where with probability $p$ a $1$ is emitted. A \textbf{typical sequence} contains $k$ ones and $(n-k)$ zeros, with $k\simeq np$ (the mean number of ones is rounded to the closest integer.)
\begin{enumerate}[(a)]
\item What is the probability that the source emits a particular typical sequence? Give an approximation to that probability as a function of the entropy of the source.
\item What is the number of typical sequences, expressed as a function of the entropy of the source? Compare it with the absolute number of sequences that can be emitted by the source. What is the approximate probability that a typical sequence is emitted? (Use the Stirling approximation $n! \approx n^n$)
\item What is the most probable sequence that can be emitted by the source? Is it typical?
\end{enumerate}
\end{exercise}

\begin{exercise}~
\begin{definition}
$\rho(x,y)$ is a \textbf{metric} if it satisfies the following properties $\forall x,y,z$:
\begin{itemize}
\item $\rho(x,y)\ge 0$
\item $\rho(x,y)=\rho(y,x)$
\item $\rho(x,y)=0$ if and only if $x=y$
\item $\rho(x,y)+\rho(y,z)\ge \rho(x,z)$
\end{itemize}
\end{definition}
\begin{enumerate}[(a)]
\item Let $\rho(X,Y)=H(X|Y)+H(Y|X)$.
Verify that $\rho(X,Y)=H(X,Y)-I(X;Y)=2H(X,Y)-H(X)-H(Y)$,
and interpret this relation with the help of a Venn diagram. 
\item Prove that $\rho(X,Y)$ satisfies all properties of a distance if the notation $X=Y$ means that there exists a bijection between $X$ and $Y$.
\end{enumerate}
\emph{Hint}: To show the last property use the strong sub-additivity.
\end{exercise}

\begin{exercise}~Let $X,Y$ and $Z$ be uniformly distributed binary random variables such that $H(X)=H(Y)=H(Z)=1$~bit. If $Z$ is ignored, the mutual information of $X$ and $Y$ can be defined as $I(X;Y)=H(X)-H(X|Y)$. If $Z$ is taken into account, the mutual information, \textbf{conditioned} on $Z$, can be defined as $I(X;Y|Z)=H(X,Z)-H(X|Y,Z)$. Finally, the mutual information of all three random variables can be defined as $I(X;Y;Z)=I(X;Y)-I(X;Y|Z)$. The last quantity can be positive, negative or zero.
\begin{enumerate}[(a)]
\item Give an example for a distribution of $X$, $Y$ and $Z$
such that $I(X;Y;Z)>0$. 
\item Give an example for a distribution of $X$, $Y$ and $Z$
such that $I(X;Y;Z)<0$.
\end{enumerate}
\end{exercise} 

\section*{}
\centering \textbf{\color{ulbblue}{The exercises and solutions are available at} \url{http://quic.ulb.ac.be/teaching}}
\end{document}