Tensorboard 可视化功能深入

1. Tensorboard Embeddings 可视化功能
- 1.1. 其他处理部分
- 1.2. 主要代码部分

1 Tensorboard Embeddings 可视化功能

1.1 其他处理部分

<<get-pid>>
<<kill-pid>>
<<del-graph-summary>>
<<tensorboard-run>>
<<run-tensorboard>>

ps -aux | grep "python" | grep -E "(default|lec4|tensorboard)" | grep -v "grep" | awk '{print $2}'

6675

11038

11044

32122

;; 取元素
(defun r1l(tbl)
  (mapcar (lambda (x) (number-to-string (car x))) tbl)
  )
;; (print pid)
;; (print (reduce-one-layer pid))
(mapcar #'shell-command-to-string
        (mapcar (lambda (x) (concat "kill " x)) (r1l pid))))

rm -rf /home/yiddi/git_repos/on_ml_tensorflow/logs/*
ls /home/yiddi/git_repos/on_ml_tensorflow/logs

1.2 主要代码部分

<<包导入>>
# <<导入 projector: for embeddings 可视化>>


<<数据准备>>
# numpy构造(with/without noise)
# 数据集导入内存(one_hot or not)
# 截取已经存在数据集

<<图参数>>
# 批次大小
# 批次数量
# dropout 保留率
# 数据集位置

<<工具函数与工具声明>>
# 对某些 Variable 进行 OP 并 summary
# <<def Variable: for embeddings 可视化>> as untrainable Variable, stack front 3000 img, give name 'embeddings'
# <<file IO: for embeddings 可视化>> read in one_hot labels, argmax get true labels, write to file in one-label-one-line format

<<图构造>>
# 一神: NN layers, name_scope for TB, 参数 summary
# 1. placeholders
#    1.1 x: dataset placeholder,
#    + <<def OP: for img process >> reshape x
#    1.2 y: labelset placeholder,
#    1.3 keep_prob: dropout, keep rate of certain layer's nodes
# 2. Variables
#    2.0 名称空间设置
#    2.1 第一层权重 W, 声明 summary tf.summary.scalar/image/histogram node
#    2.2 第一层偏置 b, 声明 summary tf.summary.scalar/image/histogram node
# 3. Operations
#    3.1 第一层输出(active_fn(score)), 声明 summary tf.summary.scalar/image/histogram node

# 两函:
#   1. err_fn:
#      1.1 名称空间设置
#      1.2 err fn(单点错误), 声明 summary, tf.summary.scalar/image/histogram node
#   2. loss_fn:
#      2.1 名称空间设置
#      2.2 loss fn(整体错误), 声明 summary, tf.summary.scalar/image/histogram node

# 两器:
#   1. 初始化器
#   2. 优化器
#      2.1 名称空间设置

# 准确率计算
#   1. correct_prediction
#      1.1 名称空间设置
#   2. accuracy
#      2.1 名称空间设置

# 合并summary

# 配置 embeddings 可视化参数

<<图计算>>
# 运行初始化器
# summary Writer for TB
# for epoch_num: <<
#          1. for batch_num:
#                 1.1 x_y_of_next_batch;
#                 1.2 运行 优化器计算 and summary计算
#          2. 运行准确率计算
# matplot绘图

import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("MNIST", one_hot=True)
max_steps = 1001
image_num = 3000
DIR = "/home/yiddi/git_repos/on_ml_tensorflow/"

sess = tf.Session()

# 这里记录了你如何获取数据集中的一部分的操作,这里是获取测试集图片的前3000张
embeddings = tf.Variable(tf.stack(mnist.test.images[:image_num]), trainable=False, name='embeddings')

def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
              stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)


with tf.name_scope('input'):
    x=tf.placeholder(tf.float32, [None,784], name='x-input') # -->-+ 因为 placeholder.shape 与数据集.shape 是一致的,而构建图的时候
    y=tf.placeholder(tf.float32, [None,10], name='y-input')  #     | 我们并不知道数据集有多少个样本, 所以 placeholder 第一维度设置
                                                             #     v 为 None, 这样就导致 image_shaped_input 第一维度设置为-1, 表示
                                                             #     | 此处值自适应, 当前不确定.
with tf.name_scope('input_reshape'):                         #     |
    image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])      # -<--+ 这里reshape的目的是把以向量形式存储的图片,还原成矩阵的形式以便绘制
    tf.summary.image('input', image_shaped_input, 10)        # 这里是放置 10 张图片进入 summary file

with tf.name_scope('layer'):
    with tf.name_scope('weights'):
        W=tf.Variable(tf.zeros([784, 10]),name='W')
        variable_summaries(W)

    with tf.name_scope('biases'):
        b=tf.Variable(tf.zeros([10]),name='b')
        variable_summaries(b)

    with tf.name_scope('score'):
        score=tf.matmul(x, W) + b

    with tf.name_scope('softmax'):
        prediction=tf.nn.softmax(score)

with tf.name_scope('loss'):
    loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=prediction))
    tf.summary.scalar('loss', loss)
with tf.name_scope('train'):
    train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

sess.run(tf.global_variables_initializer())

with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        correct_prediction=tf.equal(tf.argmax(y,1), tf.argmax(prediction, 1))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)

# 产生metadata文件
# 把每个sample的标签(即,对应什么数字) 写入metadata文件中
# metadata.tsv 形如:
# -------------
# 1 \
# 7  |
# 0  |
# 6  |
# .  |-- 3000 行
# .  |
# .  |
# 3  |
# 9 /
# -------------
if tf.gfile.Exists(DIR + 'projector/projector/metadata.tsv'):
    tf.gfile.Remove(DIR + 'projector/projector/metadata.tsv')
with open(DIR + 'projector/projector/metadata.tsv', 'w') as f:
    labels = sess.run(tf.argmax(mnist.test.labels[:], 1))
    for i in range(image_num):
        f.write(str(labels[i]) + '\n')

merged = tf.summary.merge_all()

# 配置 embedding 可视化相关参数
projector_writer = tf.summary.FileWriter(DIR + 'projector/projector', sess.graph)
saver = tf.train.Saver() #可以保存网络模型, 生成 ckpt file.
config = projector.ProjectorConfig() # 定义配置文件
embed= config.embeddings.add()
embed.tensor_name = embeddings.name #存储部分数据集的untrainable Variable名字赋值给 embedding 层的tensor_name
embed.metadata_path = DIR + 'projector/projector/metadata.tsv' # labels 文件名
embed.sprite.image_path = DIR + 'projector/projector/mnist_10k_sprite.png' # 原始图片文件
embed.sprite.single_image_dim.extend([28,28]) # 按照 28*28 切分文件
projector.visualize_embeddings(projector_writer, config) # 载入 writer 和 config 进行可视化, 产生 xxxx.pbtxt file

for i in range(max_steps):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # 固定用法
    run_metadata = tf.RunMetadata() # 固定用法
    summary, _ = sess.run([merged, train_step], feed_dict={x:batch_xs, y:batch_ys}, options=run_options, run_metadata=run_metadata)
    projector_writer.add_run_metadata(run_metadata, 'step%03d' % i)
    projector_writer.add_summary(summary, i)

    if i%100 == 0:
        acc=sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
        print("Iter " + str(i) + ", Testing Accuracy= "  + str(acc))

saver.save(sess, DIR + 'projector/projector/a_model.ckpt', global_step=max_steps)
projector_writer.close()
sess.close()

(require 'ob-async)

ob-async

tensorboard --logdir=/home/yiddi/git_repos/on_ml_tensorflow/projector/projector

d0b50d716025b683cdae4cf4e54826d7

embedding 可视化流程

       ^Embedding Space
       |
       |
       |          2                     TensorBoard
       |         22
       |
       |       2         0
       |     1             00          ---------<-----------------------------------------+
       |    111            0                                                              |
       |                                                                                  |
-------+---------------------------->                                                     |
       |                                                                                  |
                                                     用于在TB中显示每个点的真实label      |
                                                                                          | PCA: from 784D to 2D
+--------------+                                                                          |
|    ...       |                                     metadata.tsv    -------------        |
|    ...       |                                                     1 \                  |
|     ..       |                           embed.metadata_path       7  |                 |
|     ..       |   -------------+          = xxx.tsv                 0  |                 |
|   ......     |                |                                    6  |                 |
|   ......     |                |                                    .  |-- 3000 行       ^
+--------------+                |                                    .  |                 |
\              /                |                                    .  |                 |
 \            /                 |                                    3  |                 |
  \          /                  |                                    9 /                  |
   \         |                  |                                    -------------        |
    \        |                  |                                                         |
     \       |                  |                                                         |
      \      |                  |                     embeddings                          |
       \     |                  |                             784 D                       |
        \   /                   |                     /-----------------------\           |
+--+--+--+--+--+                |                    [[12, 143, 120, 1, 23, ...]    \     |
|  |  |  |  |  |                |                     [12, 139, 151, 9, 63, ...]     |
+--+--+--+--+--+                |                     [22, 199, 120, 3, 83, ...]     |  3000 行
|  |  |  |  |  |                +------------------   [62, 177, 192, 5, 13, ...]     |
+--+--+--+--+--+                                      ...                            |
|  |  |  |  |  |                                      [91, 254, 120, 14, 30, ...]]  /
+--+--+--+--+--+
|  |  |  |  |  |                                     数据集中的点集
+--+--+--+--+--+                                     embed.tensor_name = embeddings.name
|  |  |  |  |  |
+--+--+--+--+--+                                     用来做降维并在TB显示的数据集
数据集中的点对应的图片集

embed.sprite.image_path = xxx.png

用于在 TB embedding 坐标中显示其
原始图片是什么.

Tensorboard 可视化功能深入

Table of Contents

1 Tensorboard Embeddings 可视化功能

1.1 其他处理部分

1.2 主要代码部分