一、TensorFlow Probability 簡介
TensorFlow Probability 是通過概率編程實現深度學習的開源庫。
在傳統深度學習中,模型參數通常是確定性或固定的。但是,在模型和訓練數據不充分或存在噪聲時,也許最佳參數的不確定性就會開始出現問題。此時,傳統深度學習模型會將噪聲視為訓練集的一部分,但是概率編程可以處理這種不確定性並將其視為噪聲源作為輸入。
TensorFlow Probability 建立在 TensorFlow 的基礎上,並使用可微分正則概率分布表示神經網絡的權重和偏差。
二、概率編程的基本思想
概率編程可以看作是一種建立在概率理論基礎上的編程範式。通過概率編程,開發者可以使用概率建模和推斷進行靈活計算,能夠處理如下問題:
1、在有限的數據集上推斷參數
2、解決樣本規模很小的問題
3、處理缺失數據集
4、在觀測到有限數據時提高模型的不確定性
三、TensorFlow Probability 實現深度學習的方法
1、使用正則化的分布建議網絡的權重
import tensorflow as tf import tensorflow_probability as tfp model = tf.keras.Sequential([ tf.keras.layers.Dense( 10, activation=tf.nn.relu, input_shape=(n_features,)), # Apply the L2 regularization with a factor of 1/lambda tfp.layers.DenseVariational( 1, posterior_mean_field, prior_trainable), # input_shape=(10,) ])
2、將權重和偏差使用正則化的分布建議
def build_normal_predictive_distribution(layer): """Create the predictive distribution of a layer output.""" def normal_predictive_distribution(distribution): """Create the predictive distribution of a distribution.""" return tfp.distributions.Normal(distribution.mean(), distribution.stddev()) return tfp.layers.DistributionLambda( make_distribution_fn=normal_predictive_distribution, convert_to_tensor_fn=tfp.distributions.Distribution.sample, name=f'{layer.name}_distribution') model = tf.keras.Sequential([ tfp.layers.DenseVariational( 10, posterior_mean_field, prior_trainable, activation=tf.nn.relu, input_shape=(n_features,), ), build_normal_predictive_distribution(model.layers[-1]), tfp.layers.DistributionLambda( make_distribution_fn=lambda t: tfp.distributions.Normal( t, scale=1), name='y_distribution'), ])
3、使用可微分概率分布進行推斷
model.compile( optimizer=tf.optimizers.Adam(lr=learning_rate), loss=neg_log_likelihood, metrics=[neg_log_likelihood, accuracy]) history = model.fit( x_train, y_train, batch_size=batch_size, epochs=n_epochs, validation_data=(x_test, y_test), verbose=0)
四、TensorFlow Probability 實現深度學習的實例
下面的代碼實現了在 MNIST 數據集上對手寫數字進行分類的概率編程方法——使用 CNN(MLP) + dropout-MLP 模型。
import tensorflow as tf import tensorflow_probability as tfp from sklearn.datasets import fetch_openml from sklearn.model_selection import train_test_split tfd = tfp.distributions # Load data X, y = fetch_openml('mnist_784', version=1, return_X_y=True) X, y = X[:5000, ...].astype('float32'), y[:5000] X /= 255. y = y.astype('int32') x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42) n_samples = 50 n_epochs = 100 batch_size = 128 n_features = x_train.shape[-1] n_classes = 10 def convolutional_input_encoder(x, input_shape): """Pre-process the inputs of the convolutional encoder.""" x = tf.reshape(x, [-1] + list(input_shape) + [1]) return x def convolutional_output_decoder(x): """Re-format and flatten the outputs of the convolutional decoder.""" _, h, w, c = x.shape.as_list() x = tf.reshape(x, [-1, h*w*c]) return x model = tf.keras.Sequential([ tf.keras.layers.Lambda( lambda x: convolutional_input_encoder( x, input_shape=(28, 28)), input_shape=(28, 28)), tf.keras.layers.Conv2D( 32, [5, 5], strides=[1, 1], padding='same', activation=tf.nn.relu), tf.keras.layers.MaxPooling2D( pool_size=[2, 2], strides=[2, 2], padding='same'), tf.keras.layers.Conv2D( 64, [5, 5], strides=[1, 1], padding='same', activation=tf.nn.relu), tf.keras.layers.MaxPooling2D( pool_size=[2, 2], strides=[2, 2], padding='same'), tf.keras.layers.Lambda( lambda x: convolutional_output_decoder(x)), tfp.layers.DenseFlipout( 256, activation=tf.nn.relu), tf.keras.layers.Dropout(0.5), tfp.layers.DenseFlipout( 10), tfp.layers.DistributionLambda( lambda t: tfd.Categorical(logits=t), name='y_dist') ]) def neg_log_likelihood(y_true, y_pred): return -tf.reduce_mean(y_pred.log_prob(tf.squeeze(y_true))) model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss=neg_log_likelihood) history = model.fit( x_train, y_train, batch_size=batch_size, epochs=n_epochs, verbose=0, validation_data=(x_test, y_test)) _, test_acc = model.evaluate(x_test, y_test, verbose=0) print(f'Test accuracy: {test_acc:.4f}')
原創文章,作者:小藍,如若轉載,請註明出處:https://www.506064.com/zh-hant/n/195640.html