% No 'submit' option for the problems by themselves.
\documentclass{harvardml}
% Use the 'submit' option when you submit your solutions.
%\documentclass[submit]{harvardml}
\usepackage{url}
% Put in your full name and email address.
\name{Your Name}
\email{email@fas.harvard.edu}
% List any people you worked with.
\collaborators{%
John Doe,
Fred Doe
}
% You don't need to change these.
\course{CS281-F13}
\assignment{Assignment \#4}
\duedate{23:59pm November 8, 2013}
% Useful macros
\newcommand{\bw}{\boldsymbol{w}}
\newcommand{\distNorm}{\mathcal{N}}
\newcommand{\given}{\,|\,}
\newcommand{\ident}{\mathbb{I}}
\begin{document}
\begin{problem}[10 pts]
For target distribution~$\pi(x)$ and proposal
distribution~$q(x'\gets x)$, the Metropolis--Hastings transition
operator is given by
\begin{align*}
T(x'\gets x) &= q(x'\gets x)\,\min\left\{1,
\frac{\pi(x')\,q(x\gets x')}{\pi(x)\,q(x'\gets x)}
\right\}.
\end{align*}
Show that this transition operator satisfies detailed balance.
\end{problem}
\begin{problem}[20 pts]
Here is a hierarchical model that looks like a ten-dimensional ``funnel'':
\begin{align*}
v &\sim \distNorm(v \given 0, 3^2)\\
x_k \given v &\sim \distNorm(x_k\given 0, e^v) \text{ for $k=1, 2, \ldots, 9$}.
\end{align*}
This model leads to a joint distribution
\begin{align}
\label{eqn:joint}
p(v, x_1, x_2, \ldots, x_9) &=
\distNorm(v\given 0, 3^2)
\prod^9_{k=1}\distNorm(x_k\given 0, e^v).
\end{align}
\begin{enumerate}
\item Draw a graphical model describing this joint distribution.
\item Out of 50,000 samples from this joint distribution, how many
of them will have ${v < -5}$?
\item Exploiting the directed structure, generate 50,000 samples
from this joint distribution. Plot a histogram of the marginal
samples for~$v$. How many samples had~${v < -5}$?
\item Pretend you don't know about the directed structure and that
you just had the joint distribution in Equation~\ref{eqn:joint}.
Use Metropolis--Hastings to generate 50,000 samples. Use a
ten-dimensional Gaussian proposal distribution with identity
covariance, centered at the current state. That is,
if~${\bw=[v, x_1, \ldots, x_9]}$, then
\begin{align*}
q(\bw'\gets \bw) &= \distNorm(\bw'\given \bw, \ident_9).
\end{align*}
Start from~${v=0}$ and~${x_k=1}$. Plot a histogram of~$v$. How
many samples had~${v<-5}$?
\item Implement slice sampling on the same joint distribution.
Use any variant of slice sampling you wish (e.g.,
coordinate-wise, random directions, hyperrectangle, exponential
step-out, etc.). Specify which you use. Plot a histogram
of~$v$. How many samples had~${v< -5}$?
\end{enumerate}
\end{problem}
In the following two questions, you will implement latent Dirichlet
allocation using MCMC and variational inference. You can apply LDA to
any data you wish, including the text of the Jester data we have
previously studied. Other possible corpora are:
\begin{itemize}
\item NIPS papers: \url{http://cs.nyu.edu/~roweis/data.html}
\item Newsgroups: \url{http://people.csail.mit.edu/jrennie/20Newsgroups/}
\item Associated Press: \url{http://www.cs.princeton.edu/~blei/lda-c/index.html}
\end{itemize}
Remember, you'll need to do some preprocessing to turn things into
word counts. You'll probably want to remove punctuation, tokenize and
lowercase. You may also want to remove stop words and very rare words.
In each case, provide some analysis and discussion of your
implementation and results. For example: What were the most common
topics? What were the most common words across each topic? If you
had metadata, how did this data relate to the topics? How many topics
did you use and how did you arrive at this choice? What preprocessing
did you carry out? How many iterations did you run and how long did
this take? Did the implementations find interestingly different
solutions? How did you set the hyperparameters? Did you try
alternative values? Did you do any quantitative analysis of your
model?
\begin{problem}[35 pts]
One approach to performing inference in latent Dirichlet allocation
is to use Markov chain Monte Carlo, as described in Griffiths and
Steyvers (2004). This is an example of Gibbs sampling. Implement
this approach and describe what you find.
\end{problem}
\begin{problem}[35 pts]
Another approach to LDA inference is to use a variational
approximation, as described in Blei, Ng and Jordan (2003). This is
an example of mean field variational inference. Feeling ambitious?
Check out recent results on \emph{stochastic} and \emph{online}
variational inference in LDA.
\end{problem}
\end{document}