\documentclass[12pt]{article} \usepackage[french]{babel} \voffset -2.8 cm \textwidth 17 cm \textheight 24.7 cm \oddsidemargin -.54 cm \evensidemargin -1.54 cm \usepackage{amsfonts} \usepackage{amssymb} \usepackage{amsmath} \usepackage[latin1]{inputenc} \pagestyle{plain} \begin{document} \centerline {\Large \sc Information and coding theory} \bigskip \bigskip \hrule \bigskip \bigskip \centerline {\underline{\Large Solution of exercise sheet n$^\circ$ 5}:} \bigskip \noindent {\bf 5-1.} \par \medskip Channel capacity: $ C= \max_{p(x)} I(X:Y) $. There are three ways to calculate $I(X:Y)$: \begin{enumerate} \item $I(X:Y) =\sum_{x,y} p(x,y) \log_2 \frac{p(x,y)}{p(x)p(y)}$. \item $I(X:Y)=H(X)-H(X|Y)$. \item $I(X:Y)=H(Y)-H(Y|X)$. \end{enumerate} We remark that for the (memoryless) additive noise channel where the input $X$ and the noise $Z$ are uncorrelated we can use the relation $H(Y|X)=H(X+Z|X)=H(Z)$. Therefore, for the calculation of the capacity we can use in this exercise the equation \begin{equation}\label{eq:capacity} C = \max_{p(x)}\{H(Y)\} - H(Z), \end{equation} where the second term $H(Z)$ no longer depends on $X$ (and thus, p(x)). In the following we need to distinguish three cases: I) $a=0$, II) $a > 1$ and III) $a = 1$. \begin{itemize} \item I) {\bf $a = 0$:} No noise is added, thus $Y=X$ and $H(Z)=0$. The capacity is therefore $C= \max_{p(x)} H(p,1-p)=1$ bit. \item II) {\bf $a > 1$:} The output alphabet reads ${\mathcal Y}= \{ 0,1,a,1+a \}$. For the input variable $X$ we define the general probability distribution $P(X=0)=p, P(X=1)=1-p$. Then we can compute probability distribution of the output $Y$, \emph{i.e.} $P(Y=0)=P(X=0) P(Z=0)=p/2$, $P(Y=1)=P(X=1) P(Z=0)=(1-p)/2$, $P(Y=a)=P(X=0) P(Z=a)=p/2$ and $P(Y=a+1)=P(X=1) P(Z=a)=(1-p)/2$. We conclude that each output can be associated to a unique combination of input $X$ and noise $Z$ and thus, we make no error. We confirm that indeed like in I) again $C=1$ bit by injecting the probability distribution $p(y)$ in equation \eqref{eq:capacity}: \[ C = \max_p\{-p \log_2(p/2) -(1-p)\log_2(\frac{1-p}{2})\} - 1 \textrm{ bit}. \] This is maximized by $p=1/2$ for which $C=1$ bit. \item III) {\bf $a=1$:} In this case, ${\mathcal Y} = \{ 0,1,2 \}$. We see like above that if one obtains $Y=0,2$ one does not do any error when estimating $X$. However, $Y=1$ corresponds to either $X=0, Z=1$ or $X=1, Z=0$. We find the output probability distribution $p(y) = \{p/2, 1/2, (1-p)/2 \}$. Injected in the capacity formula \eqref{eq:capacity} we find that $C=1/2$ bits for $p=1/2$. \end{itemize} \bigskip \noindent {\bf 5-2.} This exercise can be solved in the same way as in ex. {\bf 5-1}, \emph{i.e.} because the input $X$ and noise $Z$ are independent we can use again Eq.~\eqref{eq:capacity} (careful: in general \eqref{eq:capacity} is \textbf{is not valid!}). \par \medskip \noindent (a) We again parametrize the input probability distribution, but now, as the input takes 4 values we set it to $p(x)=\{a,b,c,d\}$ where $a+b+c+d=1$ (alternatively one can include the constraint into the parametrization, so write $p(x)=\{a,b,c,1-a-b-c\}$). We remark, that $-1 \mod 4 = 3$, so the output alphabet reads ${\mathcal Y} = \{ 0,1,2,3 \}$. Now, we have to find the parameters $a,b,c,d$ that maximize the output entropy $H(Y)$. We can express (similar to ex. {\bf 5-1}) $p(y)$ as a function of $a,b,c,d$, which reads $p(y)=\{ \frac{a}{4}+\frac{c}{4}+\frac{d}{2}, \frac{b}{4}+\frac{c}{2}+\frac{d}{4}, \frac{a}{4}+\frac{b}{2}+\frac{c}{4}, \frac{a}{2}+\frac{b}{4}+\frac{d}{4} \}$. We try now to find $a,b,c,d$ such that the (optimal) uniform distribution $p(y)=\{1/4,1/4,1/4,1/4\}$ is reached (careful: in general it may \textbf{not be possible} to achieve this! Then one needs to apply the method of Lagrange multipliers). We have thus, 4 equations + 1 equation for the constraint $a+b+c+d=1$ to solve (one equation is linear dependent on the others). We find that $a=c$ and $b=d$. Namely, there is an infinite number of solutions and among them $a=b=c=d=1/4$, \emph{i.e.}, the uniform distribution for $X$ is a solution. \par \medskip \noindent (b) When we inject the solution of (a) in Eq.~\eqref{eq:capacity} we find $C=1/2$ bits. \par \medskip \noindent (c) We need to sum both noises: $Z_{total}=Z_{1}+Z_{2}$ and again follow a calculation like in (a). One obtains the new probability distribution (attention: $-2=2$ since we apply ``mod $4$''); $Z_{total}$ takes the values: -1, 0, 1, 2 with probabilities 1/4, 3/8, 1/4, 1/8. We have thus $H(Z)=H(1/4,3/8,1/4,1/8)$ and we find $C=\log_2 4-1.91=0.09$ bits. \par \medskip \noindent (d) $C_{total}=C_{1}+C_{2}=1$ bit. (The capacity is additive!) \bigskip \noindent {\bf 5-3.} \par \medskip (a) If $p(X=1)=p$ and $p(X=0)=1-p$. One obtains: \begin{equation} I(X:Y)=H(1-p/2,p/2)-p \nonumber \end{equation} Taking into account that: \begin{equation} \frac{\partial H(x,1-x)}{\partial x}=\log_2\frac{1-x}{x} \end{equation} The maximum is found for $p^*=2/5$ and $C=I_{p=2/5}(X,Y)=\log_2 5 -2$ bits. \par \medskip (b) The binary symmetric channel has the capacity $C=1-H(\alpha)$ bits, where $\alpha$ is the error rate of the channel, because $I(X:Y)=H(Y)-\sum p(x)H(Y|X=x)=H(Y)-H(\alpha)\leq 1-H(\alpha)$ bits. \par \medskip (c) We have $C= \max_{p(x)} I(X:Y)$. O\`u $I(X:Y)=H(Y)-H(Y|X)$. The entropy at the output is given by $H(Y)=H(1-pq,pq)$ and the conditional entropy reads $H(Y|X)=pH(q,1-q)$. $I(X:Y)$ is maximal if $\frac{\partial I(X:Y)}{\partial p}=0$, which implies \begin{equation} q\log_2\frac{1-pq}{pq}=H(q,1-q). \end{equation} To simplify notations we write $H(q,1-q)=H$. The distribution $p$ that maximizes $I(X:Y)$ is \begin{equation} p=\frac{1}{q(1+2^{H/q})}. \end{equation} We finally obtain the capacity of the channel: \begin{equation}\label{eq:qCap} C=\log_2(1+2^{H/q})-\frac{H}{q}. \end{equation} To check consistency we can test Eq.~\eqref{eq:qCap} for $q=0.5$. Since $H(q=0.5)=1$ we confirm the result of (a), \emph{i.e.} $C(q=0.5) = \log_2 5 - 2$ bits. \bigskip \noindent {\bf 5-4.} \par \medskip (a) It does not attain the capacity because the equiprobable distribution does not maximize the mutual information of the channel of exercises 5-3. \par \medskip (b) For a probability distribution $p(x)$, the maximal transmission rate $R$ is bounded from above by $I(X:Y)$. For the channel of exercise 5-3: $R_{p=1/2} < I_{p=1/2}(X:Y) = 0.3113$ bits. \bigskip \noindent {\bf 5-5.} \par \medskip (a) $C=\max_{p(x)} I(X:Y)$ and $\tilde{C}=\max_{p(x)}I(X:\tilde{Y})$. \newline We have $I(X:Y,\tilde{Y})=H(X:Y) + H(X:\tilde{Y}|Y)$, \newline and $I(X:Y,\tilde{Y})=H(X:\tilde{Y})+H(X:Y|\tilde{Y})$. \newline Since $H(X:Y|\tilde{Y}) \geq 0$ and $H(X:\tilde{Y}|Y)=0$ (see exercise 2-3), we deduce that \newline $I(X:\tilde{Y}) \leq I(X:Y)$. Thus, $\tilde{C} > C$ is impossible. \par \medskip (b) One requires $H(X:Y|\tilde{Y})=0$. This chain thus must satisfy $X\rightarrow \tilde{Y} \rightarrow Y$. This is only possible if $Y \leftrightarrow \tilde{Y}$, \emph{i.e.} iff $\tilde{Y}=f(Y)$ is a bijective function. %\bigskip %\noindent {\bf 5-6.} %\par %\medskip % %(a) $ %I(X:Y)= H(Y) - H(Y|X)= %H(Y)-\sum_x p(x)H(Y|X=x) %=H(Y)- \log_2 2$. Puisque le maximum pour %$H(Y)$ est $\log_2 5$, on obtient %$C= \log_2(5/2)= 1,32$ bits. %(b) %Avec 5 mots code $00, 13, 21, 34, 42$ %le canal à la sortie transmet des mots code %distinguables des autres: %$$ %\begin{array}{c|cccc} %00& 44& 11& 14 & 41 \\ %13 & 02 & 04 & 22 & 24 \\ %21 &10 & 12 & 30 & 32 \\ %34 & 20 & 23 & 40 & 43 \\ %42 & 01 & 03 & 31 & 33 %\end{array} %$$ %Le taux de transmission à erreur nulle est supérieur ou %égal à $\log_2 5/2 = 1,16$ bits. Comme $R1.16$ bits et donc $C>1$ bit. \end{document} \item {\bf $a\geq 2$:} Nous traiterons le cas $a=2$. Le cas $a >2$ est semblable, il suffit de changer les valeurs de $Y$. Le tableau de probabilité est le suivant: $$ \begin{array}{|c|cccc|c|} \hline p(x,y) & 0 & 1 & 2 & 3 & p(x) \\ \hline 0 &(1-p)/2 & 0 & (1-p)/2 & 0 & 1-p \\ 1 & 0 & p/2 & 0 & p/2 & p \\ \hline p(y) & (1-p)/2 & p/2 & (1-p)/2 & p/2 & 1 \\ \hline \end{array} $$ Donc, $$ I(X:Y)=-(1-p)\log_2 (1-p)- p\log_2 p $$ et $C=1$ bit. $I(X:Y)=H(X)$ car malgré le bruit introduit, on ne fais pas d'erreur sur $X$ lorsqu'on interprète la sortie $Y$.