Thiết kế website giá rẻ

Question

i am trying to implement a model for sentiment analysis in text data using self-attention. In this example, i am using multi-head attention but cannot be sure if the results are accurate or not. It always shows approximately same heatmap attention for every example i try

<code>import tensorflow as tf

from tensorflow.keras.models import Model

from tensorflow.keras.layers import Dense, MultiHeadAttention, Input, LayerNormalization, GlobalAveragePooling1D

# Define the input

num_heads = 8 # Number of attention heads

droup_out = 0.5

lstm_units = 64

attention_dim = 128

learning_rate = 0.0001

maxlen = 20 # sequence length

embedding_dim = 200 # Define your embedding dimension

vocab_size = len(tokenizer.word_index) + 1

# Define the input

inputs = Input(shape=(maxlen,))

embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(inputs)

lstm_layer = LSTM(lstm_units, return_sequences=True)(embedding_layer)

dropout_layer = Dropout(droup_out)(lstm_layer)

# Add the MultiHeadAttention layer

attention_layer = MultiHeadAttention(num_heads=8, key_dim=attention_dim)

attention_output, attention_weights = attention_layer(query=dropout_layer, value=dropout_layer, key=dropout_layer, return_attention_scores=True)

# Normalize the output of the attention layer

attention_output = LayerNormalization(epsilon=1e-6)(attention_output)

# Add a global average pooling layer to reduce the output to a fixed size

pooled_output = GlobalAveragePooling1D()(attention_output)

# Add more layers as needed

dense_output = Dense(units=32, activation='relu')(pooled_output)

output_layer = Dense(1, activation='sigmoid')(dense_output)

# Create the model

model = Model(inputs=inputs, outputs=output_layer)

def lr_schedule(epoch, lr):

if epoch % 5 == 0 and epoch > 0:

return lr / 10

return lr

# Compile the model

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Train the model

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the model

results = model.evaluate(X_test, y_test)

print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")

</code>

<code>import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.layers import Dense, MultiHeadAttention, Input, LayerNormalization, GlobalAveragePooling1D # Define the input num_heads = 8 # Number of attention heads droup_out = 0.5 lstm_units = 64 attention_dim = 128 learning_rate = 0.0001 maxlen = 20 # sequence length embedding_dim = 200 # Define your embedding dimension vocab_size = len(tokenizer.word_index) + 1 # Define the input inputs = Input(shape=(maxlen,)) embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(inputs) lstm_layer = LSTM(lstm_units, return_sequences=True)(embedding_layer) dropout_layer = Dropout(droup_out)(lstm_layer) # Add the MultiHeadAttention layer attention_layer = MultiHeadAttention(num_heads=8, key_dim=attention_dim) attention_output, attention_weights = attention_layer(query=dropout_layer, value=dropout_layer, key=dropout_layer, return_attention_scores=True) # Normalize the output of the attention layer attention_output = LayerNormalization(epsilon=1e-6)(attention_output) # Add a global average pooling layer to reduce the output to a fixed size pooled_output = GlobalAveragePooling1D()(attention_output) # Add more layers as needed dense_output = Dense(units=32, activation='relu')(pooled_output) output_layer = Dense(1, activation='sigmoid')(dense_output) # Create the model model = Model(inputs=inputs, outputs=output_layer) def lr_schedule(epoch, lr): if epoch % 5 == 0 and epoch > 0: return lr / 10 return lr # Compile the model optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) # Train the model early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping]) # Evaluate the model results = model.evaluate(X_test, y_test) print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}") </code>

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, MultiHeadAttention, Input, LayerNormalization, GlobalAveragePooling1D

# Define the input
num_heads = 8         # Number of attention heads
droup_out = 0.5
lstm_units = 64
attention_dim = 128
learning_rate = 0.0001
maxlen = 20 #  sequence length
embedding_dim = 200  # Define your embedding dimension
vocab_size = len(tokenizer.word_index) + 1

# Define the input
inputs = Input(shape=(maxlen,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(inputs)
lstm_layer = LSTM(lstm_units, return_sequences=True)(embedding_layer)
dropout_layer = Dropout(droup_out)(lstm_layer)

# Add the MultiHeadAttention layer
attention_layer = MultiHeadAttention(num_heads=8, key_dim=attention_dim)
attention_output, attention_weights = attention_layer(query=dropout_layer, value=dropout_layer, key=dropout_layer, return_attention_scores=True)


# Normalize the output of the attention layer
attention_output = LayerNormalization(epsilon=1e-6)(attention_output)

# Add a global average pooling layer to reduce the output to a fixed size
pooled_output = GlobalAveragePooling1D()(attention_output)

# Add more layers as needed
dense_output = Dense(units=32, activation='relu')(pooled_output)
output_layer = Dense(1, activation='sigmoid')(dense_output)

# Create the model
model = Model(inputs=inputs, outputs=output_layer)

def lr_schedule(epoch, lr):
    if epoch % 5 == 0 and epoch > 0:
        return lr / 10
    return lr

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the model
results = model.evaluate(X_test, y_test)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")

Thiết kế website giá rẻ

Danh mục

multihead self-attention for sentiment analysis not accurate results