一、TensorFlow Probability 简介
TensorFlow Probability 是通过概率编程实现深度学习的开源库。
在传统深度学习中,模型参数通常是确定性或固定的。但是,在模型和训练数据不充分或存在噪声时,也许最佳参数的不确定性就会开始出现问题。此时,传统深度学习模型会将噪声视为训练集的一部分,但是概率编程可以处理这种不确定性并将其视为噪声源作为输入。
TensorFlow Probability 建立在 TensorFlow 的基础上,并使用可微分正则概率分布表示神经网络的权重和偏差。
二、概率编程的基本思想
概率编程可以看作是一种建立在概率理论基础上的编程范式。通过概率编程,开发者可以使用概率建模和推断进行灵活计算,能够处理如下问题:
1、在有限的数据集上推断参数
2、解决样本规模很小的问题
3、处理缺失数据集
4、在观测到有限数据时提高模型的不确定性
三、TensorFlow Probability 实现深度学习的方法
1、使用正则化的分布建议网络的权重
import tensorflow as tf
import tensorflow_probability as tfp
model = tf.keras.Sequential([
tf.keras.layers.Dense(
10, activation=tf.nn.relu, input_shape=(n_features,)),
# Apply the L2 regularization with a factor of 1/lambda
tfp.layers.DenseVariational(
1, posterior_mean_field, prior_trainable), # input_shape=(10,)
])
2、将权重和偏差使用正则化的分布建议
def build_normal_predictive_distribution(layer):
"""Create the predictive distribution of a layer output."""
def normal_predictive_distribution(distribution):
"""Create the predictive distribution of a distribution."""
return tfp.distributions.Normal(distribution.mean(), distribution.stddev())
return tfp.layers.DistributionLambda(
make_distribution_fn=normal_predictive_distribution,
convert_to_tensor_fn=tfp.distributions.Distribution.sample,
name=f'{layer.name}_distribution')
model = tf.keras.Sequential([
tfp.layers.DenseVariational(
10,
posterior_mean_field,
prior_trainable,
activation=tf.nn.relu,
input_shape=(n_features,),
),
build_normal_predictive_distribution(model.layers[-1]),
tfp.layers.DistributionLambda(
make_distribution_fn=lambda t: tfp.distributions.Normal(
t, scale=1),
name='y_distribution'),
])
3、使用可微分概率分布进行推断
model.compile(
optimizer=tf.optimizers.Adam(lr=learning_rate),
loss=neg_log_likelihood,
metrics=[neg_log_likelihood, accuracy])
history = model.fit(
x_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
validation_data=(x_test, y_test),
verbose=0)
四、TensorFlow Probability 实现深度学习的实例
下面的代码实现了在 MNIST 数据集上对手写数字进行分类的概率编程方法——使用 CNN(MLP) + dropout-MLP 模型。
import tensorflow as tf
import tensorflow_probability as tfp
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
tfd = tfp.distributions
# Load data
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X, y = X[:5000, ...].astype('float32'), y[:5000]
X /= 255.
y = y.astype('int32')
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)
n_samples = 50
n_epochs = 100
batch_size = 128
n_features = x_train.shape[-1]
n_classes = 10
def convolutional_input_encoder(x, input_shape):
"""Pre-process the inputs of the convolutional encoder."""
x = tf.reshape(x, [-1] + list(input_shape) + [1])
return x
def convolutional_output_decoder(x):
"""Re-format and flatten the outputs of the convolutional decoder."""
_, h, w, c = x.shape.as_list()
x = tf.reshape(x, [-1, h*w*c])
return x
model = tf.keras.Sequential([
tf.keras.layers.Lambda(
lambda x: convolutional_input_encoder(
x, input_shape=(28, 28)),
input_shape=(28, 28)),
tf.keras.layers.Conv2D(
32, [5, 5], strides=[1, 1],
padding='same', activation=tf.nn.relu),
tf.keras.layers.MaxPooling2D(
pool_size=[2, 2], strides=[2, 2],
padding='same'),
tf.keras.layers.Conv2D(
64, [5, 5], strides=[1, 1],
padding='same', activation=tf.nn.relu),
tf.keras.layers.MaxPooling2D(
pool_size=[2, 2], strides=[2, 2],
padding='same'),
tf.keras.layers.Lambda(
lambda x: convolutional_output_decoder(x)),
tfp.layers.DenseFlipout(
256, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tfp.layers.DenseFlipout(
10),
tfp.layers.DistributionLambda(
lambda t: tfd.Categorical(logits=t),
name='y_dist')
])
def neg_log_likelihood(y_true, y_pred):
return -tf.reduce_mean(y_pred.log_prob(tf.squeeze(y_true)))
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss=neg_log_likelihood)
history = model.fit(
x_train, y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=0,
validation_data=(x_test, y_test))
_, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {test_acc:.4f}')
原创文章,作者:小蓝,如若转载,请注明出处:https://www.506064.com/n/195640.html
微信扫一扫
支付宝扫一扫