I write code to make analysis of the impact of TCGA data on the latent space.
Here is the code :
<code>test_input1 = tf.convert_to_tensor(tf.random.normal([X_train1.shape[0], n_input1]), dtype=tf.float32)
test_input2 = tf.convert_to_tensor(tf.random.normal([X_train2.shape[0], n_input2]), dtype=tf.float32)
input_tensor1 = tf.keras.Input(shape=(n_input1,))
input_tensor2 = tf.keras.Input(shape=(n_input2,))
combined_input = tf.concat([test_input1, test_input2], axis=1)
self_attention = SelfAttention(units=n_input1 + n_input2)
attention_output = self_attention(combined_input)
encoder = EncoderNetwork(n_input1, n_input2, n_hiddensh, activation=act, _init=func_init)
_ = encoder([test_input1,test_input2])
encoded_output = encoder([input_tensor1, input_tensor2])
wrapped_encoder = tf.keras.Model(inputs=[input_tensor1, input_tensor2], outputs=encoded_output)
load_encoder_weights(encoder, iterator)
load_attention_weights(self_attention, iterator)
self_attention_output = self_attention(combined_input)
test_input1_np = self_attention_output[0].numpy()
test_input2_np = self_attention_output[1].numpy()
explainer = shap.DeepExplainer(wrapped_encoder, [test_input1_np, test_input2_np])
shap_values = explainer.shap_values([test_input1_np, test_input2_np]
shap_values_0 = np.squeeze(shap_values[0])
feature_names_1 = [exp_feature_names[i - 1] for i in htseq_cmt[:, 2].astype(int)]
positive_counts_0 = np.sum(shap_values_0 > 0, axis=0)
negative_counts_0 = np.sum(shap_values_0 < 0, axis=0)
total_samples_0 = shap_values_0.shape[0]
positive_ratios_0 = positive_counts_0 / total_samples_0
negative_ratios_0 = negative_counts_0 / total_samples_0
positive_shap_sums_0 = np.sum(np.where(shap_values_0 > 0, shap_values_0, 0), axis=0)
negative_shap_sums_0 = np.sum(np.where(shap_values_0 < 0, shap_values_0, 0), axis=0)
</code>
<code>test_input1 = tf.convert_to_tensor(tf.random.normal([X_train1.shape[0], n_input1]), dtype=tf.float32)
test_input2 = tf.convert_to_tensor(tf.random.normal([X_train2.shape[0], n_input2]), dtype=tf.float32)
input_tensor1 = tf.keras.Input(shape=(n_input1,))
input_tensor2 = tf.keras.Input(shape=(n_input2,))
combined_input = tf.concat([test_input1, test_input2], axis=1)
self_attention = SelfAttention(units=n_input1 + n_input2)
attention_output = self_attention(combined_input)
encoder = EncoderNetwork(n_input1, n_input2, n_hiddensh, activation=act, _init=func_init)
_ = encoder([test_input1,test_input2])
encoded_output = encoder([input_tensor1, input_tensor2])
wrapped_encoder = tf.keras.Model(inputs=[input_tensor1, input_tensor2], outputs=encoded_output)
load_encoder_weights(encoder, iterator)
load_attention_weights(self_attention, iterator)
self_attention_output = self_attention(combined_input)
test_input1_np = self_attention_output[0].numpy()
test_input2_np = self_attention_output[1].numpy()
explainer = shap.DeepExplainer(wrapped_encoder, [test_input1_np, test_input2_np])
shap_values = explainer.shap_values([test_input1_np, test_input2_np]
shap_values_0 = np.squeeze(shap_values[0])
feature_names_1 = [exp_feature_names[i - 1] for i in htseq_cmt[:, 2].astype(int)]
positive_counts_0 = np.sum(shap_values_0 > 0, axis=0)
negative_counts_0 = np.sum(shap_values_0 < 0, axis=0)
total_samples_0 = shap_values_0.shape[0]
positive_ratios_0 = positive_counts_0 / total_samples_0
negative_ratios_0 = negative_counts_0 / total_samples_0
positive_shap_sums_0 = np.sum(np.where(shap_values_0 > 0, shap_values_0, 0), axis=0)
negative_shap_sums_0 = np.sum(np.where(shap_values_0 < 0, shap_values_0, 0), axis=0)
</code>
test_input1 = tf.convert_to_tensor(tf.random.normal([X_train1.shape[0], n_input1]), dtype=tf.float32)
test_input2 = tf.convert_to_tensor(tf.random.normal([X_train2.shape[0], n_input2]), dtype=tf.float32)
input_tensor1 = tf.keras.Input(shape=(n_input1,))
input_tensor2 = tf.keras.Input(shape=(n_input2,))
combined_input = tf.concat([test_input1, test_input2], axis=1)
self_attention = SelfAttention(units=n_input1 + n_input2)
attention_output = self_attention(combined_input)
encoder = EncoderNetwork(n_input1, n_input2, n_hiddensh, activation=act, _init=func_init)
_ = encoder([test_input1,test_input2])
encoded_output = encoder([input_tensor1, input_tensor2])
wrapped_encoder = tf.keras.Model(inputs=[input_tensor1, input_tensor2], outputs=encoded_output)
load_encoder_weights(encoder, iterator)
load_attention_weights(self_attention, iterator)
self_attention_output = self_attention(combined_input)
test_input1_np = self_attention_output[0].numpy()
test_input2_np = self_attention_output[1].numpy()
explainer = shap.DeepExplainer(wrapped_encoder, [test_input1_np, test_input2_np])
shap_values = explainer.shap_values([test_input1_np, test_input2_np]
shap_values_0 = np.squeeze(shap_values[0])
feature_names_1 = [exp_feature_names[i - 1] for i in htseq_cmt[:, 2].astype(int)]
positive_counts_0 = np.sum(shap_values_0 > 0, axis=0)
negative_counts_0 = np.sum(shap_values_0 < 0, axis=0)
total_samples_0 = shap_values_0.shape[0]
positive_ratios_0 = positive_counts_0 / total_samples_0
negative_ratios_0 = negative_counts_0 / total_samples_0
positive_shap_sums_0 = np.sum(np.where(shap_values_0 > 0, shap_values_0, 0), axis=0)
negative_shap_sums_0 = np.sum(np.where(shap_values_0 < 0, shap_values_0, 0), axis=0)
The abnormal thing is the result of difference between the values of postive shap samples values and negative values is almost similar.
I don’t know if the weights and bias loading is incorrect.Or this data is special.
Is there any testing method to see if the abnormal result is related to data itself ?
1