Back to Basic: Fine Tuning BERT for Sentiment Analysis

Today’s post continues on from yesterday. It will cover the training and evaluation function as well as test set prediction. Ideally, we would have another post, covering the same process except using PyTorch Lightning!

  1. Load dependencies and datasets
  2. Tokenisation and data processing
  3. PyTorch DataLoader
  4. Bert classifier
  5. Initialise optimizer, loss function, and scheduler
  6. Training and Eval
  7. Eval on test set

7. Training and Eval

In [30]:
import random
import time

def evaluate(model, val_dataloader):
  model.eval()

  val_accuracy = []
  val_loss = []

  for batch in val_dataloader:
    b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

    with torch.no_grad():
      logits = model(b_input_ids, b_attn_mask)

      loss = loss_fn(logits, b_labels)
      val_loss.append(loss.item())

      # Get the predictions
      preds = torch.argmax(logits, dim=1).flatten()

      # Calculate the accuracy rate
      accuracy = (preds == b_labels).cpu().numpy().mean() * 100
      val_accuracy.append(accuracy)

  val_loss = np.mean(val_loss)
  val_accuracy = np.mean(val_accuracy)

  return val_loss, val_accuracy
In [52]:
def train(model, train_dataloader, val_dataloader=None, epochs = 4, evaluation = False):
  for epoch_i in range(epochs):
    print("EPOCH: %s" % epoch_i)
    t_epoch, t_batch = time.time(), time.time()

    total_loss = 0

    model.train()

    for step, batch in enumerate(train_dataloader):      
      b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

      model.zero_grad()

      logits = model(b_input_ids, b_attn_mask)

      loss = loss_fn(logits, b_labels)
      total_loss += loss.item()

      loss.backward()

      torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

      optimizer.step()
      scheduler.step()
    
    avg_train_loss = total_loss / len(train_dataloader)

    if evaluation:
      val_loss, val_accuracy = evaluate(model, val_dataloader)
  
  print('Training Completed!')
In [53]:
seed_value = 21
random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
In [54]:
train(bert_classifier, train_dataloader, val_dataloader, epochs = 4, evaluation = True)
EPOCH: 0
EPOCH: 1
EPOCH: 2
EPOCH: 3
Training Completed!

8. Predictions on Test Set

In [56]:
# Preparing the test data
test_inputs, test_masks = bert_preprocessing(test_data.tweet)

# Create the DataLoader for our test set
test_dataset = TensorDataset(test_inputs, test_masks)
test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=32)
In [59]:
import torch.nn.functional as F

def bert_predict(model, dataloader):
  model.eval()

  all_logits = []

  for batch in dataloader:
    b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)[:2]

    # Compute logits
    with torch.no_grad():
        logits = model(b_input_ids, b_attn_mask)
    all_logits.append(logits)
  
  all_logits = torch.cat(all_logits, dim = 0)

  probs = F.softmax(all_logits, dim = 1).cpu().numpy()

  return probs
In [60]:
# Compute predicted probabilities on the test set
test_probs = bert_predict(bert_classifier, test_dataloader)

# Get predictions from the probabilities
threshold = 0.9
preds = np.where(test_probs[:, 1] > threshold, 1, 0)

# Number of tweets predicted non-negative
print("Number of tweets predicted non-negative: ", preds.sum())
Number of tweets predicted non-negative:  947
In [65]:
test_data['sentiment'] = preds
In [66]:
test_data.head()
Out[66]:
id tweet sentiment
0 33 @SouthwestAir get your damn act together. Don’… 0
1 58 @AmericanAir horrible at responding to emails…. 0
2 135 @AmericanAir hey where is your crew? Flight aa… 0
3 159 Ok come on we are late let’s goooo @united 0
4 182 @AmericanAir since you are now affiliated with… 0
In [67]:
test_data['sentiment'].value_counts()
Out[67]:
0    3608
1     947
Name: sentiment, dtype: int64
Ryan

Ryan

Data Scientist

Leave a Reply