一、TensorFlow Probability 簡介
TensorFlow Probability 是通過概率編程實現深度學習的開源庫。
在傳統深度學習中,模型參數通常是確定性或固定的。但是,在模型和訓練數據不充分或存在噪聲時,也許最佳參數的不確定性就會開始出現問題。此時,傳統深度學習模型會將噪聲視為訓練集的一部分,但是概率編程可以處理這種不確定性並將其視為噪聲源作為輸入。
TensorFlow Probability 建立在 TensorFlow 的基礎上,並使用可微分正則概率分布表示神經網絡的權重和偏差。
二、概率編程的基本思想
概率編程可以看作是一種建立在概率理論基礎上的編程範式。通過概率編程,開發者可以使用概率建模和推斷進行靈活計算,能夠處理如下問題:
1、在有限的數據集上推斷參數
2、解決樣本規模很小的問題
3、處理缺失數據集
4、在觀測到有限數據時提高模型的不確定性
三、TensorFlow Probability 實現深度學習的方法
1、使用正則化的分布建議網絡的權重
import tensorflow as tf
import tensorflow_probability as tfp
model = tf.keras.Sequential([
tf.keras.layers.Dense(
10, activation=tf.nn.relu, input_shape=(n_features,)),
# Apply the L2 regularization with a factor of 1/lambda
tfp.layers.DenseVariational(
1, posterior_mean_field, prior_trainable), # input_shape=(10,)
])
2、將權重和偏差使用正則化的分布建議
def build_normal_predictive_distribution(layer):
"""Create the predictive distribution of a layer output."""
def normal_predictive_distribution(distribution):
"""Create the predictive distribution of a distribution."""
return tfp.distributions.Normal(distribution.mean(), distribution.stddev())
return tfp.layers.DistributionLambda(
make_distribution_fn=normal_predictive_distribution,
convert_to_tensor_fn=tfp.distributions.Distribution.sample,
name=f'{layer.name}_distribution')
model = tf.keras.Sequential([
tfp.layers.DenseVariational(
10,
posterior_mean_field,
prior_trainable,
activation=tf.nn.relu,
input_shape=(n_features,),
),
build_normal_predictive_distribution(model.layers[-1]),
tfp.layers.DistributionLambda(
make_distribution_fn=lambda t: tfp.distributions.Normal(
t, scale=1),
name='y_distribution'),
])
3、使用可微分概率分布進行推斷
model.compile(
optimizer=tf.optimizers.Adam(lr=learning_rate),
loss=neg_log_likelihood,
metrics=[neg_log_likelihood, accuracy])
history = model.fit(
x_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
validation_data=(x_test, y_test),
verbose=0)
四、TensorFlow Probability 實現深度學習的實例
下面的代碼實現了在 MNIST 數據集上對手寫數字進行分類的概率編程方法——使用 CNN(MLP) + dropout-MLP 模型。
import tensorflow as tf
import tensorflow_probability as tfp
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
tfd = tfp.distributions
# Load data
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X, y = X[:5000, ...].astype('float32'), y[:5000]
X /= 255.
y = y.astype('int32')
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)
n_samples = 50
n_epochs = 100
batch_size = 128
n_features = x_train.shape[-1]
n_classes = 10
def convolutional_input_encoder(x, input_shape):
"""Pre-process the inputs of the convolutional encoder."""
x = tf.reshape(x, [-1] + list(input_shape) + [1])
return x
def convolutional_output_decoder(x):
"""Re-format and flatten the outputs of the convolutional decoder."""
_, h, w, c = x.shape.as_list()
x = tf.reshape(x, [-1, h*w*c])
return x
model = tf.keras.Sequential([
tf.keras.layers.Lambda(
lambda x: convolutional_input_encoder(
x, input_shape=(28, 28)),
input_shape=(28, 28)),
tf.keras.layers.Conv2D(
32, [5, 5], strides=[1, 1],
padding='same', activation=tf.nn.relu),
tf.keras.layers.MaxPooling2D(
pool_size=[2, 2], strides=[2, 2],
padding='same'),
tf.keras.layers.Conv2D(
64, [5, 5], strides=[1, 1],
padding='same', activation=tf.nn.relu),
tf.keras.layers.MaxPooling2D(
pool_size=[2, 2], strides=[2, 2],
padding='same'),
tf.keras.layers.Lambda(
lambda x: convolutional_output_decoder(x)),
tfp.layers.DenseFlipout(
256, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tfp.layers.DenseFlipout(
10),
tfp.layers.DistributionLambda(
lambda t: tfd.Categorical(logits=t),
name='y_dist')
])
def neg_log_likelihood(y_true, y_pred):
return -tf.reduce_mean(y_pred.log_prob(tf.squeeze(y_true)))
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss=neg_log_likelihood)
history = model.fit(
x_train, y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=0,
validation_data=(x_test, y_test))
_, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {test_acc:.4f}')
原創文章,作者:小藍,如若轉載,請註明出處:https://www.506064.com/zh-hant/n/195640.html
微信掃一掃
支付寶掃一掃