Variational Autoencoders Explained



/*!
*
* IPython notebook
*
*/
/* CSS font colors for translated ANSI escape sequences */
/* The color values are a mix of
http://www.xcolors.net/dl/baskerville-ivorylight and
http://www.xcolors.net/dl/euphrasia */
.ansi-black-fg {
color: #3E424D;
}
.ansi-black-bg {
background-color: #3E424D;
}
.ansi-black-intense-fg {
color: #282C36;
}
.ansi-black-intense-bg {
background-color: #282C36;
}
.ansi-red-fg {
color: #E75C58;
}
.ansi-red-bg {
background-color: #E75C58;
}
.ansi-red-intense-fg {
color: #B22B31;
}
.ansi-red-intense-bg {
background-color: #B22B31;
}
.ansi-green-fg {
color: #00A250;
}
.ansi-green-bg {
background-color: #00A250;
}
.ansi-green-intense-fg {
color: #007427;
}
.ansi-green-intense-bg {
background-color: #007427;
}
.ansi-yellow-fg {
color: #DDB62B;
}
.ansi-yellow-bg {
background-color: #DDB62B;
}
.ansi-yellow-intense-fg {
color: #B27D12;
}
.ansi-yellow-intense-bg {
background-color: #B27D12;
}
.ansi-blue-fg {
color: #208FFB;
}
.ansi-blue-bg {
background-color: #208FFB;
}
.ansi-blue-intense-fg {
color: #0065CA;
}
.ansi-blue-intense-bg {
background-color: #0065CA;
}
.ansi-magenta-fg {
color: #D160C4;
}
.ansi-magenta-bg {
background-color: #D160C4;
}
.ansi-magenta-intense-fg {
color: #A03196;
}
.ansi-magenta-intense-bg {
background-color: #A03196;
}
.ansi-cyan-fg {
color: #60C6C8;
}
.ansi-cyan-bg {
background-color: #60C6C8;
}
.ansi-cyan-intense-fg {
color: #258F8F;
}
.ansi-cyan-intense-bg {
background-color: #258F8F;
}
.ansi-white-fg {
color: #C5C1B4;
}
.ansi-white-bg {
background-color: #C5C1B4;
}
.ansi-white-intense-fg {
color: #A1A6B2;
}
.ansi-white-intense-bg {
background-color: #A1A6B2;
}
.ansi-default-inverse-fg {
color: #FFFFFF;
}
.ansi-default-inverse-bg {
background-color: #000000;
}
.ansi-bold {
font-weight: bold;
}
.ansi-underline {
text-decoration: underline;
}
/* The following styles are deprecated an will be removed in a future version */
.ansibold {
font-weight: bold;
}
.ansi-inverse {
outline: 0.5px dotted;
}
/* use dark versions for foreground, to improve visibility */
.ansiblack {
color: black;
}
.ansired {
color: darkred;
}
.ansigreen {
color: darkgreen;
}
.ansiyellow {
color: #c4a000;
}
.ansiblue {
color: darkblue;
}
.ansipurple {
color: darkviolet;
}
.ansicyan {
color: steelblue;
}
.ansigray {
color: gray;
}
/* and light for background, for the same reason */
.ansibgblack {
background-color: black;
}
.ansibgred {
background-color: red;
}
.ansibggreen {
background-color: green;
}
.ansibgyellow {
background-color: yellow;
}
.ansibgblue {
background-color: blue;
}
.ansibgpurple {
background-color: magenta;
}
.ansibgcyan {
background-color: cyan;
}
.ansibggray {
background-color: gray;
}
div.cell {
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: vertical;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: vertical;
-moz-box-align: stretch;
display: box;
box-orient: vertical;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: column;
align-items: stretch;
border-radius: 2px;
box-sizing: border-box;
-moz-box-sizing: border-box;
-webkit-box-sizing: border-box;
border-width: 1px;
border-style: solid;
border-color: transparent;
width: 100%;
padding: 5px;
/* This acts as a spacer between cells, that is outside the border */
margin: 0px;
outline: none;
position: relative;
overflow: visible;
}
div.cell:before {
position: absolute;
display: block;
top: -1px;
left: -1px;
width: 5px;
height: calc(100% + 2px);
content: ”;
background: transparent;
}
div.cell.jupyter-soft-selected {
border-left-color: #E3F2FD;
border-left-width: 1px;
padding-left: 5px;
border-right-color: #E3F2FD;
border-right-width: 1px;
background: #E3F2FD;
}
@media print {
div.cell.jupyter-soft-selected {
border-color: transparent;
}
}
div.cell.selected,
div.cell.selected.jupyter-soft-selected {
border-color: #ababab;
}
div.cell.selected:before,
div.cell.selected.jupyter-soft-selected:before {
position: absolute;
display: block;
top: -1px;
left: -1px;
width: 5px;
height: calc(100% + 2px);
content: ”;
background: #42A5F5;
}
@media print {
div.cell.selected,
div.cell.selected.jupyter-soft-selected {
border-color: transparent;
}
}
.edit_mode div.cell.selected {
border-color: #66BB6A;
}
.edit_mode div.cell.selected:before {
position: absolute;
display: block;
top: -1px;
left: -1px;
width: 5px;
height: calc(100% + 2px);
content: ”;
background: #66BB6A;
}
@media print {
.edit_mode div.cell.selected {
border-color: transparent;
}
}
.prompt {
/* This needs to be wide enough for 3 digit prompt numbers: In[100]: */
min-width: 14ex;
/* This padding is tuned to match the padding on the CodeMirror editor. */
padding: 0.4em;
margin: 0px;
font-family: monospace;
text-align: right;
/* This has to match that of the the CodeMirror class line-height below */
line-height: 1.21429em;
/* Don’t highlight prompt number selection */
-webkit-touch-callout: none;
-webkit-user-select: none;
-khtml-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
/* Use default cursor */
cursor: default;
}
@media (max-width: 540px) {
.prompt {
text-align: left;
}
}
div.inner_cell {
min-width: 0;
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: vertical;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: vertical;
-moz-box-align: stretch;
display: box;
box-orient: vertical;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: column;
align-items: stretch;
/* Old browsers */
-webkit-box-flex: 1;
-moz-box-flex: 1;
box-flex: 1;
/* Modern browsers */
flex: 1;
}
/* input_area and input_prompt must match in top border and margin for alignment */
div.input_area {
border: 1px solid #cfcfcf;
border-radius: 2px;
background: #f7f7f7;
line-height: 1.21429em;
}
/* This is needed so that empty prompt areas can collapse to zero height when there
is no content in the output_subarea and the prompt. The main purpose of this is
to make sure that empty JavaScript output_subareas have no height. */
div.prompt:empty {
padding-top: 0;
padding-bottom: 0;
}
div.unrecognized_cell {
padding: 5px 5px 5px 0px;
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: horizontal;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: horizontal;
-moz-box-align: stretch;
display: box;
box-orient: horizontal;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: row;
align-items: stretch;
}
div.unrecognized_cell .inner_cell {
border-radius: 2px;
padding: 5px;
font-weight: bold;
color: red;
border: 1px solid #cfcfcf;
background: #eaeaea;
}
div.unrecognized_cell .inner_cell a {
color: inherit;
text-decoration: none;
}
div.unrecognized_cell .inner_cell a:hover {
color: inherit;
text-decoration: none;
}
@media (max-width: 540px) {
div.unrecognized_cell > div.prompt {
display: none;
}
}
div.code_cell {
/* avoid page breaking on code cells when printing */
}
@media print {
div.code_cell {
page-break-inside: avoid;
}
}
/* any special styling for code cells that are currently running goes here */
div.input {
page-break-inside: avoid;
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: horizontal;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: horizontal;
-moz-box-align: stretch;
display: box;
box-orient: horizontal;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: row;
align-items: stretch;
}
@media (max-width: 540px) {
div.input {
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: vertical;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: vertical;
-moz-box-align: stretch;
display: box;
box-orient: vertical;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: column;
align-items: stretch;
}
}
/* input_area and input_prompt must match in top border and margin for alignment */
div.input_prompt {
color: #303F9F;
border-top: 1px solid transparent;
}
div.input_area > div.highlight {
margin: 0.4em;
border: none;
padding: 0px;
background-color: transparent;
}
div.input_area > div.highlight > pre {
margin: 0px;
border: none;
padding: 0px;
background-color: transparent;
}
/* The following gets added to the if it is detected that the user has a
* monospace font with inconsistent normal/bold/italic height. See
* notebookmain.js. Such fonts will have keywords vertically offset with
* respect to the rest of the text. The user should select a better font.
* See: https://github.com/ipython/ipython/issues/1503
*
* .CodeMirror span {
* vertical-align: bottom;
* }
*/
.CodeMirror {
line-height: 1.21429em;
/* Changed from 1em to our global default */
font-size: 14px;
height: auto;
/* Changed to auto to autogrow */
background: none;
/* Changed from white to allow our bg to show through */
}
.CodeMirror-scroll {
/* The CodeMirror docs are a bit fuzzy on if overflow-y should be hidden or visible.*/
/* We have found that if it is visible, vertical scrollbars appear with font size changes.*/
overflow-y: hidden;
overflow-x: auto;
}
.CodeMirror-lines {
/* In CM2, this used to be 0.4em, but in CM3 it went to 4px. We need the em value because */
/* we have set a different line-height and want this to scale with that. */
/* Note that this should set vertical padding only, since CodeMirror assumes
that horizontal padding will be set on CodeMirror pre */
padding: 0.4em 0;
}
.CodeMirror-linenumber {
padding: 0 8px 0 4px;
}
.CodeMirror-gutters {
border-bottom-left-radius: 2px;
border-top-left-radius: 2px;
}
.CodeMirror pre {
/* In CM3 this went to 4px from 0 in CM2. This sets horizontal padding only,
use .CodeMirror-lines for vertical */
padding: 0 0.4em;
border: 0;
border-radius: 0;
}
.CodeMirror-cursor {
border-left: 1.4px solid black;
}
@media screen and (min-width: 2138px) and (max-width: 4319px) {
.CodeMirror-cursor {
border-left: 2px solid black;
}
}
@media screen and (min-width: 4320px) {
.CodeMirror-cursor {
border-left: 4px solid black;
}
}
/*

Original style from softwaremaniacs.org (c) Ivan Sagalaev
Adapted from GitHub theme

*/
.highlight-base {
color: #000;
}
.highlight-variable {
color: #000;
}
.highlight-variable-2 {
color: #1a1a1a;
}
.highlight-variable-3 {
color: #333333;
}
.highlight-string {
color: #BA2121;
}
.highlight-comment {
color: #408080;
font-style: italic;
}
.highlight-number {
color: #080;
}
.highlight-atom {
color: #88F;
}
.highlight-keyword {
color: #008000;
font-weight: bold;
}
.highlight-builtin {
color: #008000;
}
.highlight-error {
color: #f00;
}
.highlight-operator {
color: #AA22FF;
font-weight: bold;
}
.highlight-meta {
color: #AA22FF;
}
/* previously not defined, copying from default codemirror */
.highlight-def {
color: #00f;
}
.highlight-string-2 {
color: #f50;
}
.highlight-qualifier {
color: #555;
}
.highlight-bracket {
color: #997;
}
.highlight-tag {
color: #170;
}
.highlight-attribute {
color: #00c;
}
.highlight-header {
color: blue;
}
.highlight-quote {
color: #090;
}
.highlight-link {
color: #00c;
}
/* apply the same style to codemirror */
.cm-s-ipython span.cm-keyword {
color: #008000;
font-weight: bold;
}
.cm-s-ipython span.cm-atom {
color: #88F;
}
.cm-s-ipython span.cm-number {
color: #080;
}
.cm-s-ipython span.cm-def {
color: #00f;
}
.cm-s-ipython span.cm-variable {
color: #000;
}
.cm-s-ipython span.cm-operator {
color: #AA22FF;
font-weight: bold;
}
.cm-s-ipython span.cm-variable-2 {
color: #1a1a1a;
}
.cm-s-ipython span.cm-variable-3 {
color: #333333;
}
.cm-s-ipython span.cm-comment {
color: #408080;
font-style: italic;
}
.cm-s-ipython span.cm-string {
color: #BA2121;
}
.cm-s-ipython span.cm-string-2 {
color: #f50;
}
.cm-s-ipython span.cm-meta {
color: #AA22FF;
}
.cm-s-ipython span.cm-qualifier {
color: #555;
}
.cm-s-ipython span.cm-builtin {
color: #008000;
}
.cm-s-ipython span.cm-bracket {
color: #997;
}
.cm-s-ipython span.cm-tag {
color: #170;
}
.cm-s-ipython span.cm-attribute {
color: #00c;
}
.cm-s-ipython span.cm-header {
color: blue;
}
.cm-s-ipython span.cm-quote {
color: #090;
}
.cm-s-ipython span.cm-link {
color: #00c;
}
.cm-s-ipython span.cm-error {
color: #f00;
}
.cm-s-ipython span.cm-tab {
background: url();
background-position: right;
background-repeat: no-repeat;
}
div.output_wrapper {
/* this position must be relative to enable descendents to be absolute within it */
position: relative;
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: vertical;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: vertical;
-moz-box-align: stretch;
display: box;
box-orient: vertical;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: column;
align-items: stretch;
z-index: 1;
}
/* class for the output area when it should be height-limited */
div.output_scroll {
/* ideally, this would be max-height, but FF barfs all over that */
height: 24em;
/* FF needs this *and the wrapper* to specify full width, or it will shrinkwrap */
width: 100%;
overflow: auto;
border-radius: 2px;
-webkit-box-shadow: inset 0 2px 8px rgba(0, 0, 0, 0.8);
box-shadow: inset 0 2px 8px rgba(0, 0, 0, 0.8);
display: block;
}
/* output div while it is collapsed */
div.output_collapsed {
margin: 0px;
padding: 0px;
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: vertical;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: vertical;
-moz-box-align: stretch;
display: box;
box-orient: vertical;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: column;
align-items: stretch;
}
div.out_prompt_overlay {
height: 100%;
padding: 0px 0.4em;
position: absolute;
border-radius: 2px;
}
div.out_prompt_overlay:hover {
/* use inner shadow to get border that is computed the same on WebKit/FF */
-webkit-box-shadow: inset 0 0 1px #000;
box-shadow: inset 0 0 1px #000;
background: rgba(240, 240, 240, 0.5);
}
div.output_prompt {
color: #D84315;
}
/* This class is the outer container of all output sections. */
div.output_area {
padding: 0px;
page-break-inside: avoid;
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: horizontal;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: horizontal;
-moz-box-align: stretch;
display: box;
box-orient: horizontal;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: row;
align-items: stretch;
}
div.output_area .MathJax_Display {
text-align: left !important;
}
div.output_area
div.output_area
div.output_area img,
div.output_area svg {
max-width: 100%;
height: auto;
}
div.output_area img.unconfined,
div.output_area svg.unconfined {
max-width: none;
}
div.output_area .mglyph > img {
max-width: none;
}
/* This is needed to protect the pre formating from global settings such
as that of bootstrap */
.output {
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: vertical;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: vertical;
-moz-box-align: stretch;
display: box;
box-orient: vertical;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: column;
align-items: stretch;
}
@media (max-width: 540px) {
div.output_area {
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: vertical;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: vertical;
-moz-box-align: stretch;
display: box;
box-orient: vertical;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: column;
align-items: stretch;
}
}
div.output_area pre {
margin: 0;
padding: 1px 0 1px 0;
border: 0;
vertical-align: baseline;
color: black;
background-color: transparent;
border-radius: 0;
}
/* This class is for the output subarea inside the output_area and after
the prompt div. */
div.output_subarea {
overflow-x: auto;
padding: 0.4em;
/* Old browsers */
-webkit-box-flex: 1;
-moz-box-flex: 1;
box-flex: 1;
/* Modern browsers */
flex: 1;
max-width: calc(100% – 14ex);
}
div.output_scroll div.output_subarea {
overflow-x: visible;
}
/* The rest of the output_* classes are for special styling of the different
output types */
/* all text output has this class: */
div.output_text {
text-align: left;
color: #000;
/* This has to match that of the the CodeMirror class line-height below */
line-height: 1.21429em;
}
/* stdout/stderr are ‘text’ as well as ‘stream’, but execute_result/error are *not* streams */
div.output_stderr {
background: #fdd;
/* very light red background for stderr */
}
div.output_latex {
text-align: left;
}
/* Empty output_javascript divs should have no height */
div.output_javascript:empty {
padding: 0;
}
.js-error {
color: darkred;
}
/* raw_input styles */
div.raw_input_container {
line-height: 1.21429em;
padding-top: 5px;
}
pre.raw_input_prompt {
/* nothing needed here. */
}
input.raw_input {
font-family: monospace;
font-size: inherit;
color: inherit;
width: auto;
/* make sure input baseline aligns with prompt */
vertical-align: baseline;
/* padding + margin = 0.5em between prompt and cursor */
padding: 0em 0.25em;
margin: 0em 0.25em;
}
input.raw_input:focus {
box-shadow: none;
}
p.p-space {
margin-bottom: 10px;
}
div.output_unrecognized {
padding: 5px;
font-weight: bold;
color: red;
}
div.output_unrecognized a {
color: inherit;
text-decoration: none;
}
div.output_unrecognized a:hover {
color: inherit;
text-decoration: none;
}
.rendered_html {
color: #000;
/* any extras will just be numbers: */
}

.rendered_html :link {
text-decoration: underline;
}
.rendered_html :visited {
text-decoration: underline;
}

.rendered_html h1:first-child {
margin-top: 0.538em;
}
.rendered_html h2:first-child {
margin-top: 0.636em;
}
.rendered_html h3:first-child {
margin-top: 0.777em;
}
.rendered_html h4:first-child {
margin-top: 1em;
}
.rendered_html h5:first-child {
margin-top: 1em;
}
.rendered_html h6:first-child {
margin-top: 1em;
}
.rendered_html ul:not(.list-inline),
.rendered_html ol:not(.list-inline) {
padding-left: 2em;
}

.rendered_html * + ul {
margin-top: 1em;
}
.rendered_html * + ol {
margin-top: 1em;
}

.rendered_html pre,

.rendered_html tr,
.rendered_html th,

.rendered_html tbody tr:nth-child(odd) {
background: #f5f5f5;
}
.rendered_html tbody tr:hover {
background: rgba(66, 165, 245, 0.2);
}
.rendered_html * + table {
margin-top: 1em;
}

.rendered_html * + p {
margin-top: 1em;
}

.rendered_html * + img {
margin-top: 1em;
}
.rendered_html img,

.rendered_html img.unconfined,

.rendered_html * + .alert {
margin-top: 1em;
}
[dir=”rtl”]
div.text_cell {
/* Old browsers */
display: -webkit-box;
-webkit-box-orient: horizontal;
-webkit-box-align: stretch;
display: -moz-box;
-moz-box-orient: horizontal;
-moz-box-align: stretch;
display: box;
box-orient: horizontal;
box-align: stretch;
/* Modern browsers */
display: flex;
flex-direction: row;
align-items: stretch;
}
@media (max-width: 540px) {
div.text_cell > div.prompt {
display: none;
}
}
div.text_cell_render {
/*font-family: “Helvetica Neue”, Arial, Helvetica, Geneva, sans-serif;*/
outline: none;
resize: none;
width: inherit;
border-style: none;
padding: 0.5em 0.5em 0.5em 0.4em;
color: #000;
box-sizing: border-box;
-moz-box-sizing: border-box;
-webkit-box-sizing: border-box;
}
a.anchor-link:link {
text-decoration: none;
padding: 0px 20px;
visibility: hidden;
}
h1:hover .anchor-link,
h2:hover .anchor-link,
h3:hover .anchor-link,
h4:hover .anchor-link,
h5:hover .anchor-link,
h6:hover .anchor-link {
visibility: visible;
}
.text_cell.rendered .input_area {
display: none;
}
.text_cell.rendered
.text_cell.rendered .rendered_html tr,
.text_cell.rendered .rendered_html th,
.text_cell.rendered
.text_cell.unrendered .text_cell_render {
display: none;
}
.text_cell .dropzone .input_area {
border: 2px dashed #bababa;
margin: -1px;
}
.cm-header-1,
.cm-header-2,
.cm-header-3,
.cm-header-4,
.cm-header-5,
.cm-header-6 {
font-weight: bold;
font-family: “Helvetica Neue”, Helvetica, Arial, sans-serif;
}
.cm-header-1 {
font-size: 185.7%;
}
.cm-header-2 {
font-size: 157.1%;
}
.cm-header-3 {
font-size: 128.6%;
}
.cm-header-4 {
font-size: 110%;
}
.cm-header-5 {
font-size: 100%;
font-style: italic;
}
.cm-header-6 {
font-size: 100%;
font-style: italic;
}

.highlight pre .hll { background-color: #ffffcc }
.highlight pre { background: #f8f8f8; }
.highlight pre .c { color: #408080; font-style: italic } /* Comment */
.highlight pre .err { border: 1px solid #FF0000 } /* Error */
.highlight pre .k { color: #008000; font-weight: bold } /* Keyword */
.highlight pre .o { color: #666666 } /* Operator */
.highlight pre .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
.highlight pre .cm { color: #408080; font-style: italic } /* Comment.Multiline */
.highlight pre .cp { color: #BC7A00 } /* Comment.Preproc */
.highlight pre .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
.highlight pre .c1 { color: #408080; font-style: italic } /* Comment.Single */
.highlight pre .cs { color: #408080; font-style: italic } /* Comment.Special */
.highlight pre .gd { color: #A00000 } /* Generic.Deleted */
.highlight pre .ge { font-style: italic } /* Generic.Emph */
.highlight pre .gr { color: #FF0000 } /* Generic.Error */
.highlight pre .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight pre .gi { color: #00A000 } /* Generic.Inserted */
.highlight pre .go { color: #888888 } /* Generic.Output */
.highlight pre .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
.highlight pre .gs { font-weight: bold } /* Generic.Strong */
.highlight pre .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
.highlight pre .gt { color: #0044DD } /* Generic.Traceback */
.highlight pre .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
.highlight pre .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
.highlight pre .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
.highlight pre .kp { color: #008000 } /* Keyword.Pseudo */
.highlight pre .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
.highlight pre .kt { color: #B00040 } /* Keyword.Type */
.highlight pre .m { color: #666666 } /* Literal.Number */
.highlight pre .s { color: #BA2121 } /* Literal.String */
.highlight pre .na { color: #7D9029 } /* Name.Attribute */
.highlight pre .nb { color: #008000 } /* Name.Builtin */
.highlight pre .nc { color: #0000FF; font-weight: bold } /* Name.Class */
.highlight pre .no { color: #880000 } /* Name.Constant */
.highlight pre .nd { color: #AA22FF } /* Name.Decorator */
.highlight pre .ni { color: #999999; font-weight: bold } /* Name.Entity */
.highlight pre .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
.highlight pre .nf { color: #0000FF } /* Name.Function */
.highlight pre .nl { color: #A0A000 } /* Name.Label */
.highlight pre .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
.highlight pre .nt { color: #008000; font-weight: bold } /* Name.Tag */
.highlight pre .nv { color: #19177C } /* Name.Variable */
.highlight pre .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
.highlight pre .w { color: #bbbbbb } /* Text.Whitespace */
.highlight pre .mb { color: #666666 } /* Literal.Number.Bin */
.highlight pre .mf { color: #666666 } /* Literal.Number.Float */
.highlight pre .mh { color: #666666 } /* Literal.Number.Hex */
.highlight pre .mi { color: #666666 } /* Literal.Number.Integer */
.highlight pre .mo { color: #666666 } /* Literal.Number.Oct */
.highlight pre .sa { color: #BA2121 } /* Literal.String.Affix */
.highlight pre .sb { color: #BA2121 } /* Literal.String.Backtick */
.highlight pre .sc { color: #BA2121 } /* Literal.String.Char */
.highlight pre .dl { color: #BA2121 } /* Literal.String.Delimiter */
.highlight pre .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
.highlight pre .s2 { color: #BA2121 } /* Literal.String.Double */
.highlight pre .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
.highlight pre .sh { color: #BA2121 } /* Literal.String.Heredoc */
.highlight pre .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
.highlight pre .sx { color: #008000 } /* Literal.String.Other */
.highlight pre .sr { color: #BB6688 } /* Literal.String.Regex */
.highlight pre .s1 { color: #BA2121 } /* Literal.String.Single */
.highlight pre .ss { color: #19177C } /* Literal.String.Symbol */
.highlight pre .bp { color: #008000 } /* Name.Builtin.Pseudo */
.highlight pre .fm { color: #0000FF } /* Name.Function.Magic */
.highlight pre .vc { color: #19177C } /* Name.Variable.Class */
.highlight pre .vg { color: #19177C } /* Name.Variable.Global */
.highlight pre .vi { color: #19177C } /* Name.Variable.Instance */
.highlight pre .vm { color: #19177C } /* Name.Variable.Magic */
.highlight pre .il { color: #666666 } /* Literal.Number.Integer.Long */

Ever wondered how the Variational Autoencoder (VAE) model works? Do you want to know how VAE is able to generate new examples similar to the dataset it was trained on?

After reading this post, you’ll be equipped with the theoretical understanding of the inner workings of VAE, as well as being able to implement one yourself.

In a future post I’ll provide you with a working code of a VAE trained on a dataset of handwritten digits images, and we’ll have some fun generating new digits!


Generative models

VAE is a generative model – it estimates the Probability Density Function (PDF) of the training data. If such a model is trained on natural looking images, it should assign a high probability value to an image of a lion. An image of random gibberish on the other hand should be assigned a low probability value.

The VAE model can also sample examples from the learned PDF, which is the coolest part, since it’ll be able to generate new examples that look similar to the original dataset!

I’ll explain the VAE using the MNIST handwritten digits dataset. The input to the model is an image in $\mathbb{R}^{28×28}$. The model should estimate a high probability value if the input looks like a digit.

The challenge of modeling images

The interactions between pixels pose a great challenge. If the pixels were independent of each other, we would have needed to learn the PDF of every pixel independently, which is easy. The sampling would have been a breeze too – we would have just sampled each pixel independently.

In digit images there are clear dependencies between pixels. If you look at the left half of an image and see the start of a 4, you’d be very surprised to see the right half is the end of a 0. But why?…

Latent space

You know every image of a digit should contain, well, a single digit. An input in $\mathbb{R}^{28×28}$ doesn’t explicitly contain that information. But it must reside somewhere… That somewhere is the latent space.

Photo by Samuel Zeller on Unsplash

You can think of the latent space as $\mathbb{R}^{k}$ where every vector contains $k$ pieces of essential information needed to draw an image. Let’s say the first dimension contains the number represented by the digit. The second dimension can be the width. The third – the angle. And so on.

We can think of the process that generated the images as a two steps process. First the person decides – consciously or not – all the attributes of the digit he’s going to draw. Next, these decisions transform into brushstrokes.

VAE tries to model this process: given an image $x$, we want to find at least one latent vector which is able to describe it; one vector that contains the instructions to generate $x$. Formulating it using the law of total probability, we get $P(x) = \int P(x|z)P(z)dz$.

Let’s pour some intuition into the equation:

  • The integral means we should search over the entire latent space for candidates.
  • For every candidate $z$, we ask ourselves: can $x$ be generated using the instructions of $z$? Is $P(x|z)$ big enough? If, for instance, $z$ encodes the information that the digit is 7, then an image of 8 is impossible. An image of 1, however, might be possible, since 1 and 7 look similar.
  • We found a good $z$? Good! But wait a second… Is this $z$ even likely? Is $P(z)$ big enough? Let’s consider a given image of an upside down 7. A latent vector describing a similar looking 7 where the angle dimension is set to 180 degrees will be a perfect match. However, that $z$ is not likely, since usually digits are not drawn in a 180 degrees angle.

The VAE training objective is to maximize $P(x)$. We’ll model $P(x|z)$ using a multivariate Gaussian $\mathcal{N}(f(z), \sigma^2 \cdot I)$.

$f(z)$ will be modeled using a neural network. $\sigma$ is a hyperparameter that multiplies the identity matrix $I$.

You should keep in mind that $f$ is what we’ll be using when generating new images using a trained model. Imposing a Gaussian distribution serves for training purposes only. If we’d use a Dirac delta function (i.e. $x = f(z)$ deterministically), we wouldn’t be able to train the model using gradient descent!

The wonders of latent space

There are two big problems with the latent space approach:

  1. What information does each dimension hold? Some dimensions might relate to abstract pieces of information, e.g. style. Even if it was easy to interpret all dimensions, we wouldn’t want to assign labels to the dataset. This approach wouldn’t scale to new datasets.
  2. The latent space might be entangled, i.e. the dimensions might be correlated. A digit being drawn really fast, for instance, might result in both angled and thinner brushstrokes. Specifying these dependencies is hard.

Deep learning to the rescue

It turns out every distribution can be generated by applying a sufficiently complicated function over a standard multivariate Gaussian.

Hence, we’ll choose $P(z)$ to be a standard multivariate Gaussian. $f$, being modeled by a neural network, can thus be broken to two phases:

  1. The first layers will map the Gaussian to the true distribution over the latent space. We won’t be able to interpret the dimensions, but it doesn’t really matter.
  2. The later layers will then map from the latent space to $P(x|z)$.

So how do we train this beast?

The formula for $P(x)$ is intractable, so we’ll approximate it using Monte Carlo method:

  1. Sample $\{z_i\}_{i=1}^n$ from the prior $P(z)$.
  2. Approximate using $ P(x) \approx \frac{1}{n}\sum_{i=1}^n P(x|z_i)$.

Great! So we just sample a bunch of $z$’s and let the backpropagation party begin!

Unfortunately, since $x$ has high dimensionality, many samples are needed to get a reasonable approximation. I mean, if you sample $z$’s, what are the chances you’ll end up with an image that looks anything to do with x? This, by the way, explains why $P(x|z)$ must assign a positive probability value to any possible image, or otherwise the model won’t be able to learn: a sampled $z$ will result with an image that is almost surely different from $x$, and if the probability will be 0 the gradients won’t propagate.

So how do we solve this mess?

Let’s take a shortcut!

Photo by Stefan Steinbauer on Unsplash

Most sampled $z$’s won’t contribute anything to $P(x)$ – they’ll be too off. If only we could know in advance where to sample from…

We can introduce $Q(z|x)$. $Q$ will be trained to give high probability values to $z$’s that are likely to have generated $x$.
Now we can calculate the Monte Carlo estimation using much fewer samples from $Q$.

Unfortunately, a new problem arises! Instead of maximizing $P(x) = \int P(x|z)P(z)dz = \mathbb{E}_{z \sim P(z)} P(x|z)$, we’ll be maximizing $\mathbb{E}_{z \sim Q(z|x)} P(x|z)$. How do the two relate to each other?

Variational Inference

Variational Inference is a topic for a post of its own, so I won’t elaborate here. All I’ll say is that the two do relate via this equation:

$log P(X) – \mathcal{KL}[Q(z|x) || P(z|x)] = \mathbb{E}_{z \sim Q(z|x)}[log P(x|z)] – \mathcal{KL}[Q(z|x) || P(z)]$

$\mathcal{KL}$ is the Kullback–Leibler divergence, which intuitively measures how similar two distributions are.

In a moment you’ll see how we can maximize the right side of the equation. By doing so, the left side will also be maximized:

  • $P(x)$ will be maximized.
  • how off $Q(z|x)$ is from $P(z|x)$ – the true posterior which we don’t know – will be minimized.

The intuition behind the right side of the equation is that we have a tension:

  1. In one hand we want to maximize how well $x$ is expected to be decoded from $z \sim Q$.
  2. On the other hand, we want $Q(z|x)$ (the encoder) to be similar to the prior $P(z)$ (a multivariate Gaussian). One can think of this term as a regularization.

Minimizing the KL divergence is easy given the right choice of distributions. We’ll model $Q(z|x)$ as a neural network whose output is the parameters of a multivariate Gaussian:

  • a mean $\mu_Q$
  • a diagonal covariance matrix $\Sigma_Q$

The KL divergence then becomes analytically solvable, which is great for us (and the gradients).

The decoder part is a bit trickier. Naively, we’d tackle the fact it’s intractable by using Monte Carlo. But sampling $z$’s from $Q$ won’t allow the gradients to propagate through $Q$, because sampling is not a differentiable operation. This is problematic, since the weights of the layers that output $\Sigma_Q$ and $\mu_Q$ won’t be updated.

The reparameterization trick

We can substitute $Q$ with a deterministic parameterized transformation of a parameterless random variable:

  1. Sample from a standard (parameterless) Gaussian.
  2. Multiply the sample by the square root of $\Sigma_Q$.
  3. Add $\mu_Q$ to the result.

The result will have a distribution equal to $Q$. Now the sampling operation will be from the standard Gaussian. Hence, the gradients will be able to propagate through $\Sigma_Q$ and $\mu_Q$, since these are deterministic paths now.

The result? The model will be able to learn how to adjust $Q$’s parameters: it’ll concentrate around good $z$’s that are able to produce $x$.


Connecting the dots

The VAE model can be hard to grasp. We covered a lot of material here, and it can be overwhelming.

So let me summarize all the steps one needs to grasp in order to implement VAE.

On the left side we have the model definition:

  1. An input image is passed through an encoder network.
  2. The encoder outputs parameters of a distribution $Q(z|x)$.
  3. A latent vector $z$ is sampled from $Q(z|x)$. If the encoder learned to do its job well, most chances are $z$ will contain the information describing $x$.
  4. The decoder decodes $z$ into an image.

On the right side we have the loss:

  1. Reconstruction error: the output should be similar to the input.
  2. $Q(z|x)$ should be similar to the prior (multivariate standard Gaussian).

In order to generate new images, you can directly sample a latent vector from the prior distribution, and decode it into an image.


In the next post I’ll provide you with a working code of a VAE. Additionally, I’ll show you how you can use a neat trick to condition the latent vector such that you can decide which digit you want to generate an image for. So stay tuned 🙂


Notes

This post is based on my intuition and these sources:

if (!document.getElementById(‘mathjaxscript_pelican_#%@#$@#’)) {
var mathjaxscript = document.createElement(‘script’);
mathjaxscript.id = ‘mathjaxscript_pelican_#%@#$@#’;
mathjaxscript.type=”text/javascript”;
mathjaxscript.src=”//cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML”;
mathjaxscript[(window.opera ? “innerHTML” : “text”)] =
“MathJax.Hub.Config({” +
” config: [‘MMLorHTML.js’],” +
” TeX: { extensions: [‘AMSmath.js’,’AMSsymbols.js’,’noErrors.js’,’noUndefined.js’], equationNumbers: { autoNumber: ‘AMS’ } },” +
” jax: [‘input/TeX’,’input/MathML’,’output/HTML-CSS’],” +
” extensions: [‘tex2jax.js’,’mml2jax.js’,’MathMenu.js’,’MathZoom.js’],” +
” displayAlign: ‘center’,” +
” displayIndent: ‘0em’,” +
” showMathMenu: true,” +
” tex2jax: { ” +
” inlineMath: [ [‘$’,’$’] ], ” +
” displayMath: [ [‘$$’,’$$’] ],” +
” processEscapes: true,” +
” preview: ‘TeX’,” +
” }, ” +
” ‘HTML-CSS’: { ” +
” linebreaks: { automatic: true, width: ‘95% container’ }, ” +
” styles: { ‘.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn’: {color: ‘black ! important’} }” +
” } ” +
“}); “;
(document.body || document.getElementsByTagName(‘head’)[0]).appendChild(mathjaxscript);
}

Credit to
Source by [author_name]

Review Website

Leave a Comment