ららにく@utool.cc

機械学習に関する怪しいメモやらあれこれその他メモやら

DRBM Revision.(Hidden∈{0, 1})

Discriminative Restricted Boltzmann Machinesの隠れ変数が{0, 1}なときのメモ。


f:id:utool:20170610042706p:plain


細かい定義やら±1な場合のはこっち参照.
Discriminative Restricted Boltzmann Machines(RBM選択モデル)のメモ - ららにく@utool.cc

今回の記事は違う部分だけのっける。

Discriminative Restricted Boltzmann Machines

記号の定義

 \boldsymbol{x}は実数値
\displaystyle{\left( \boldsymbol{x} = \{ x_i \in \left(-\infty, +\infty \right) \mid i \in X \} \right) }
 \boldsymbol{h}は二値
\displaystyle{\left( \boldsymbol{h} = \{ h_i \in \{0, 1\} \mid j \in H \} \right) }
 \boldsymbol{y}は1-of-K表現
\displaystyle{\left( \boldsymbol{y} \in \left \{ \begin{pmatrix} 1\\0\\\vdots\\0 \end{pmatrix},  \begin{pmatrix} 0\\1\\\vdots\\0 \end{pmatrix}, \cdots,  \begin{pmatrix} 0\\0\\\vdots\\1 \end{pmatrix} \right \} \right) }
データ集合
\left( \boldsymbol{\mathfrak{D}} = \left\{ \boldsymbol{\mathfrak{d}}^{(1)}, \boldsymbol{\mathfrak{d}}^{(2)}, \cdots , \boldsymbol{\mathfrak{d}}^{(n)} \right\} \right)
\left( \boldsymbol{\mathfrak{d}}^{(n)} = \{ \mathfrak{d}^{(n)}_i \in \left(-\infty, +\infty \right) \mid i \in X \} \right)
ラベル集合
\boldsymbol{\mathfrak{L}} = \left\{ \boldsymbol{\mathfrak{l}}^{(1)}, \boldsymbol{\mathfrak{l}}^{(2)}, \cdots , \boldsymbol{\mathfrak{l}}^{(n)} \right\}
\displaystyle{\left( \boldsymbol{\mathfrak{l}}^{(n)} \in \left \{ \begin{pmatrix} 1\\0\\\vdots\\0 \end{pmatrix},  \begin{pmatrix} 0\\1\\\vdots\\0 \end{pmatrix}, \cdots,  \begin{pmatrix} 0\\0\\\vdots\\1 \end{pmatrix} \right \} \right) }
シグモイド関数
 \begin{align}
\mathrm{sigmoid}\left(x\right) := \frac{1}{1+\exp(-x)}
\end{align}

規格化定数

 \begin{align}
Z\left(\boldsymbol{\theta} \right) &= \sum_{k \in Y} \exp \left( d_k \right) \prod_{j \in H} \left(1+\exp\left( \sum_{i \in X}W_{ij}x_i + c_j + V_{jk} \right) \right)
\end{align}

周辺確率

 \begin{align}
P \left( \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \sum_{k \in Y} \exp \left(\sum_{i\in X}\sum_{j \in H}W_{ij}x_i h_j + \sum_{j\in H} c_j h_j + \sum_{j \in H}V_{jk}h_j + d_k \right) 
\end{align}

 \begin{align}
P \left( h_j \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \sum_{k \in Y}  \exp \left(\sum_{i\in X}W_{ij}x_i h_j + c_j h_j + V_{jk}h_j + d_k \right) \prod_{l \in H, l \neq j}\left( 1 + \exp \left( \sum_{i \in X}W_{il}x_i + c_l + V_{lk} \right) \right) \exp \left(d_k \right)
\end{align}

 \begin{align}
P \left(\boldsymbol{y} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \sum_{k \in Y} \exp \left( d_k \right) \prod_{j \in H} \left(1+\exp\left( \sum_{i \in X}W_{ij}x_i + c_j + V_{jk} \right) \right)
\end{align}

 \begin{align}
P \left(y_k = 1 \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \exp \left( d_k \right) \prod_{j \in H} \left(1+\exp\left( \sum_{i \in X}W_{ij}x_i + c_j + V_{jk} \right) \right)
\end{align}

 \begin{align}
P \left(h_j, y_k = 1 \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z}  \exp \left(\sum_{i\in X}W_{ij}x_i h_j + c_j h_j + V_{jk}h_j + d_k \right) \prod_{l \in H, l \neq j}\left( 1 + \exp \left( \sum_{i \in X}W_{il}x_i + c_l + V_{lk} \right) \right) \exp \left(d_k \right)
\end{align}

勾配

 \begin{align}
\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial W_{ij}} &= \frac{1}{N} \sum_{n=1}^N \mathfrak{d}_i^{(n)} \mathrm{sigmoid}\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} + c_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)} \right)  &-& \frac{1}{N} \sum_{n=1}^N \sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} \mathfrak{d}_i^{(n)} h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right) \\

\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial c_{j}}  &= \frac{1}{N} \sum_{n=1}^N \mathrm{sigmoid}\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} + c_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)} \right) &-& \frac{1}{N} \sum_{n=1}^N  \sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right) \\

\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial V_{jk}} &= \frac{1}{N} \sum_{n=1}^N \mathfrak{l}_k \mathrm{sigmoid}\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} + c_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)} \right) &-& \frac{1}{N} \sum_{n=1}^N  
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} h_j y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right) \\

\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial d_{k}} &= \frac{1}{N} \sum_{n=1}^N \mathfrak{l}_k^{(n)}  &-& \frac{1}{N} \sum_{n=1}^N \sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right)
\end{align}

DRBMの期待値

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} x_i h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &=  x_i\frac{1}{Z} \sum_{k \in Y} \prod_{l \in H, l \neq j} \left( 1 + \exp\left(\sum_{i \in X} W_{il} x_i + c_j + V_{lk} \right)\right)  \exp\left(\sum_{i \in X} W_{ij} x_i + c_j + V_{jk} \right) \exp\left(d_k \right) 
\end{align}

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}}  h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right)  &= \frac{1}{Z} \sum_{k \in Y} \prod_{l \in H, l \neq j} \left( 1+ \exp\left(\sum_{i \in X} W_{il} x_i + c_j + V_{lk} \right) \right)  \exp\left(\sum_{i \in X} W_{ij} x_i + c_j + V_{jk} \right) \exp\left(d_k \right) \\
\end{align}

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}}  h_j y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z}\prod_{l \in H, l \neq j} \left( 1+ \exp\left( \sum_{i \in X}W_{il}x_i + c_l + V_{lk} \right) \right) \exp\left( \sum_{i \in X}W_{ij}x_i + c_j + V_{jk} \right) \exp\left( d_k \right)
\end{align}

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}}  y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right)   &= \frac{1}{Z} \prod_{j \in H} \left( 1 + \exp\left( \sum_{i \in X}W_{ij} x_i + c_j + V_{jk} \right) \right) \exp\left(d_k \right) 
\end{align}








特別付録: 隠れ変数一般化

記号の定義(一般化)

 \boldsymbol{h}の定義を変えただけだけれども

 \boldsymbol{x}は実数値
\displaystyle{\left( \boldsymbol{x} = \{ x_i \in \left(-\infty, +\infty \right) \mid i \in X \} \right) }
 \boldsymbol{h}は適当な多値集合\mathscr{H}
\displaystyle{\left( \boldsymbol{h} = \{ h_i \in \mathscr{H} \mid j \in H \} \right) }
 \boldsymbol{y}は1-of-K表現
\displaystyle{\left( \boldsymbol{y} \in \left \{ \begin{pmatrix} 1\\0\\\vdots\\0 \end{pmatrix},  \begin{pmatrix} 0\\1\\\vdots\\0 \end{pmatrix}, \cdots,  \begin{pmatrix} 0\\0\\\vdots\\1 \end{pmatrix} \right \} \right) }
データ集合
\left( \boldsymbol{\mathfrak{D}} = \left\{ \boldsymbol{\mathfrak{d}}^{(1)}, \boldsymbol{\mathfrak{d}}^{(2)}, \cdots , \boldsymbol{\mathfrak{d}}^{(n)} \right\} \right)
\left( \boldsymbol{\mathfrak{d}}^{(n)} = \{ \mathfrak{d}^{(n)}_i \in \left(-\infty, +\infty \right) \mid i \in X \} \right)
ラベル集合
\boldsymbol{\mathfrak{L}} = \left\{ \boldsymbol{\mathfrak{l}}^{(1)}, \boldsymbol{\mathfrak{l}}^{(2)}, \cdots , \boldsymbol{\mathfrak{l}}^{(n)} \right\}
\displaystyle{\left( \boldsymbol{\mathfrak{l}}^{(n)} \in \left \{ \begin{pmatrix} 1\\0\\\vdots\\0 \end{pmatrix},  \begin{pmatrix} 0\\1\\\vdots\\0 \end{pmatrix}, \cdots,  \begin{pmatrix} 0\\0\\\vdots\\1 \end{pmatrix} \right \} \right) }

規格化定数(一般化)

 \begin{align}
Z\left(\boldsymbol{\theta} \right) &= \sum_{k \in Y} \exp \left( d_k \right) \prod_{j \in H}\sum_{h_j \in \mathscr{H}} \exp\left( \sum_{i \in X}W_{ij}x_i h_j + c_j h_j + V_{jk} h_j \right)
\end{align}

周辺確率(一般化)

 \begin{align}
P \left( \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \sum_{k \in Y} \exp \left(\sum_{i\in X}\sum_{j \in H}W_{ij}x_i h_j + \sum_{j\in H} c_j h_j + \sum_{j \in H}V_{jk}h_j + d_k \right) 
\end{align}

 \begin{align}
P \left( h_j \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \sum_{k \in Y}  \exp \left(\sum_{i\in X}W_{ij}x_i h_j + c_j h_j + V_{jk}h_j + d_k \right) \prod_{l \in H, l \neq j} \sum_{h_l \in \mathscr{H}} \exp \left( \sum_{i \in X}W_{il}x_i h_l + c_l h_l + V_{lk} h_l \right) \exp \left(d_k \right)
\end{align}

 \begin{align}
P \left(\boldsymbol{y} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \sum_{k \in Y} \exp \left( d_k \right) \prod_{j \in H} \sum_{h_j \in \mathscr{H}}\exp\left( \sum_{i \in X}W_{ij}x_i h_j + c_j h_j + V_{jk} h_j \right)
\end{align}

 \begin{align}
P \left(y_k = 1 \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z} \exp \left( d_k \right) \prod_{j \in H} \sum_{h_j \in \mathscr{H}} \exp\left( \sum_{i \in X}W_{ij}x_ih_j + c_jh_j + V_{jk}h_j \right)
\end{align}

 \begin{align}
P \left(h_j, y_k = 1 \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z}  \exp \left(\sum_{i\in X}W_{ij}x_i h_j + c_j h_j + V_{jk}h_j + d_k \right) \prod_{l \in H, l \neq j} \sum_{h_l \in \mathscr{H}} \exp \left( \sum_{i \in X}W_{il}x_i h_l + c_l h_l + V_{lk} h_l \right) \exp \left(d_k \right)
\end{align}

勾配(一般化)

縦に長すぎて地獄を見た。

 \begin{align}
\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial W_{ij}} &= \frac{1}{N} \sum_{n=1}^N \mathfrak{d}_i^{(n)} \frac{\displaystyle \sum_{h_j \in \mathscr{H}} h_j \exp\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} h_j + c_j h_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)}  h_j\right) }{\displaystyle \sum_{h_j \in \mathscr{H}} \exp\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} h_j + c_j h_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)} h_j \right)}  &-& \frac{1}{N} \sum_{n=1}^N \sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} \mathfrak{d}_i^{(n)} h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right) \\

\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial c_{j}}  &= \frac{1}{N} \sum_{n=1}^N \frac{\displaystyle \sum_{h_j \in \mathscr{H}} h_j \exp\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} h_j + c_j h_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)}  h_j\right) }{\displaystyle \sum_{h_j \in \mathscr{H}} \exp\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} h_j + c_j h_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)} h_j \right)} &-& \frac{1}{N} \sum_{n=1}^N  \sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right) \\

\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial V_{jk}} &= \frac{1}{N} \sum_{n=1}^N \mathfrak{l}_k \frac{\displaystyle \sum_{h_j \in \mathscr{H}} h_j \exp\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} h_j + c_j h_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)}  h_j\right) }{\displaystyle \sum_{h_j \in \mathscr{H}} \exp\left( \sum_{i \in X} W_{ij} \mathfrak{d}_i^{(n)} h_j + c_j h_j + \sum_{k \in Y}V_{jk} \mathfrak{l}_k^{(n)} h_j \right)} &-& \frac{1}{N} \sum_{n=1}^N  
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} h_j y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right) \\

\frac{\partial l\left(\boldsymbol{\theta} \right) }{\partial d_{k}} &= \frac{1}{N} \sum_{n=1}^N \mathfrak{l}_k^{(n)}  &-& \frac{1}{N} \sum_{n=1}^N \sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x} = \boldsymbol{\mathfrak{d}}^{(n)}, \boldsymbol{\theta} \right)
\end{align}

DRBMの期待値(一般化)

横に長すぎて地獄を見た。

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}} x_i h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &=  x_i\frac{1}{Z} \sum_{k \in Y} \prod_{l \in H, l \neq j} \sum_{h_l \in \mathscr{H}} \exp \left( \sum_{i \in X}W_{il}x_i h_l + c_l h_l + V_{lk} h_l \right) \sum_{h_j \in \mathscr{H}} h_j \exp\left(\sum_{i \in X} W_{ij} x_ih_j + c_jh_j + V_{jk}h_j \right) \exp\left(d_k \right) 
\end{align}

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}}  h_j P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right)  &= \frac{1}{Z} \sum_{k \in Y} \prod_{l \in H, l \neq j} \sum_{h_l \in \mathscr{H}} \exp \left( \sum_{i \in X}W_{il}x_i h_l + c_l h_l + V_{lk} h_l \right) \sum_{h_j \in \mathscr{H}} h_j \exp\left(\sum_{i \in X} W_{ij} x_i h_j + c_j h_j + V_{jk} h_j \right) \exp\left(d_k \right) \\
\end{align}

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}}  h_j y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right) &= \frac{1}{Z}\prod_{l \in H, l \neq j} \sum_{h_l \in \mathscr{H}} \exp \left( \sum_{i \in X}W_{il}x_i h_l + c_l h_l + V_{lk} h_l \right) \sum_{h_j \in \mathscr{H}} h_j \exp\left( \sum_{i \in X}W_{ij}x_i h_j + c_j h_j + V_{jk} h_j \right) \exp\left( d_k \right)
\end{align}

 \begin{align}
\sum_{\boldsymbol{y}}\sum_{\boldsymbol{h}}  y_k P\left( \boldsymbol{y},  \boldsymbol{h} \mid \boldsymbol{x}, \boldsymbol{\theta} \right)   &= \frac{1}{Z} \prod_{j \in H} \sum_{h_l \in \mathscr{H}} \exp \left( \sum_{i \in X}W_{il}x_i h_j + c_l h_j + V_{lk} h_j \right) \exp\left(d_k \right) 
\end{align}