% No 'submit' option for the problems by themselves.
\documentclass{harvardml}
% Use the 'submit' option when you submit your solutions.
%\documentclass[submit]{harvardml}
\usepackage{url}
% Put in your full name and email address.
\name{Your Name}
\email{email@fas.harvard.edu}
% List any people you worked with.
\collaborators{%
John Doe,
Fred Doe,
}
% You don't need to change these.
\course{CS281-F13}
\assignment{Assignment \#5}
\duedate{23:59pm November 22, 2013}
% Useful macros.
\newcommand{\bx}{\boldsymbol{x}}
\newcommand{\distNorm}{\mathcal{N}}
\newcommand{\given}{\,|\,}
\newcommand{\ident}{\mathbb{I}}
\newcommand{\bSigma}{\boldsymbol{\Sigma}}
\newcommand{\bzero}{\boldsymbol{0}}
\newcommand{\mcD}{\mathcal{D}}
\begin{document}
\begin{problem}[50pts]
In this problem, you will implement some simple computations for
Gaussian processes with a one-dimensional input space. You can assume
that the GP is zero mean and has the ``squared exponential''
covariance function (with a little bit of diagonal ``jitter''):
\begin{align*}
K(x, x') &= \alpha
\exp\left\{-\frac{1}{2\ell^2}(x-x')^2\right\} + 10^{-6}\delta(x,x').
\end{align*}
\begin{enumerate}
\item Create a grid with about 100 points or so in an input space
between zero and five. Make several plots, each with different
values for~$\alpha$ and~$\ell$. Sample ten functions from the
Gaussian process for each plot.
\item Recall from the midterm that if one has two Gaussian
variates~$\bx_1$ and~$\bx_2$, both with marginal
distribution~$\distNorm(\bzero, \bSigma)$, then
\begin{align*}
\bx_3 &= \bx_1 \cos\theta + \bx_2\sin \theta
\end{align*}
also has marginal distribution~$\distNorm(\bzero,\bSigma)$ for
any~$\theta$.
For each of the above plots, take two of the independent samples
and blend them as in the equation above. Use a range of points
for~${\theta\in(-\pi,\pi)}$. You can see that varying~$\theta$
results in a smooth blending of these two functions, while still
producing a function that is marginally from the same Gaussian
process. This is the trick that motivates the \emph{elliptical
slice sampling} algorithm.
\item Come up with three or four ``training data'', i.e.,
$\{x_n,y_n\}$ pairs. For each of your hyperparameter
variations, plot several functions from the posterior
distribution implied by these training data.
\item Using the same training data, plot the 95\% marginal
envelope for the function. This is like the grey area shown in
Figure 2.2b in the Rasmussen and Williams book.
\item Compute and report the log marginal likelihoods for each of
the hyperparameter settings you have looked at. Discuss how
these marginal likelihoods reflect how well the hyperparameters
fit the training data you invented.
\end{enumerate}
\end{problem}
% Put your solution here.
\begin{problem}[50pts]
In this problem, you'll build a basic implementation of a Dirichlet
process mixture model.
\begin{enumerate}
\item Warmup: Make 200 draws from several different instantiations
of a Chinese restaurant process, using different concentration
parameters~$\alpha$. For each of your concentration parameters,
produce a bar graph that shows how many ``customers'' have been
assigned to each ``table''.
\item Implement MCMC for a Dirichlet process mixture of Gaussians.
Two papers (among many) to check out for your implementation are:
\begin{itemize}
\item Carl Edward Rasmussen. \emph{The Infinite Gaussian Mixture
Model}. NIPS, 1999. [Don't bother implementing adaptive rejection
sampling for the hyperparameter; use slice sampling instead.]
\item Radford M. Neal. \emph{Markov Chain Sampling Methods for
Dirichlet Process Mixture Models}. Journal of Computational and
Graphical Statistics. 9:249-265, 2000. [Neal's related technical
report is also good to check out.]
\end{itemize}
This is a good opportunity to try out Geweke-style validation of your
hyperparameters. That is, there is a hyperparameter~$\alpha$,
parameters~$\theta$ and data~$\mcD$. Your model specifies a joint
distribution
\begin{align*}
p(\alpha, \theta, \mcD) &=
p(\alpha)\,p(\theta\given\alpha)\,p(\mcD\given\theta).
\end{align*}
Your inference will sample from the conditional
distribution~$p(\alpha,\theta\given\mcD)$, but you can also fantasize
data from~$p(\mcD\given\theta)$. Augment your MCMC with fantasy data
and then examine the samples that you get for~$\alpha$. Make sure
that their histogram looks like the prior.
\item Apply your implementation to data of your choice and report the
results. As a minimum, I suggest applying it to some
two-dimensional synthic data and visualizing the clusters it
discovers. One tool you might check out for generating such
synthetic data can be found here:
\url{http://hips.seas.harvard.edu/content/synthetic-pinwheel-data-matlab}.
\end{enumerate}
\end{problem}
\end{document}