LLM

BERT를 활용한 마스킹 단어 예측

import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load the pretrained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Text classification function
def classify_text(text):
	# Tokenize input text
    inputs = tokenizer.encode_plus(
        text, 
        add_special_tokens=True,
        padding='max_length',
        truncation=True, # 길이가 128 이상일 경우 128로 맞추기 위해 사용
        max_length=128,
        return_tensors='pt'
   )
   input_ids = inputs['input_ids'].to(device)
   attention_mask = inputs['attention_mask'].to(device)
   
   # Forward pass through the model
   with torch.no_grad():
   		outputs = model.to('cuda')(input_ids, attention_mask=attention_mask)
        
   logits = outputs.logits
   probabilities = torch.softmax(logits, dim=1).squeeze(dim=0)
   predicted_class = torch.argmax(probabilities).item() # 확률이 가장 높은 클래스를 찾는 것 (예측 클래스)
   
   return predicted_class, probabilities

# Example usage
text_to_classify = "This is an example sentence."
predicted_class, probabilities = classify_text(text_to_classify)

 

BERT는 다양한 응용 분야에 사용될 수 있음. ex) 다음 단어 예측, 마스킹 된 단어 예측

 

마스킹 된 단어 예측

import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load the pretrained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Next word prediction function
def predict_next_word(text):
	# Tokenize input text
    tokenized_text = tokenizer.tokenize(text)
    masked_index = tokenized_text.index('[MASK]') # 목표 단어의 위치 찾기
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    
    # Convert tokens to tensor
    tokens_tensor = torch.tensor([indexed_tokens]).to(device)
   
   # Forward pass through the model
   with torch.no_grad():
   		outputs = model.to('cuda')(tokens_tensor)
   
   predictions = outputs[0][0, masked_index].topk(k=5).indices.tolist()
   
   predicted_tokens = []
   for token_index in predictions:
   		predicted_token = tokenizer.convert_ids_to_tokens([token_index])[0]
        predicted_tokens.append(predicted_token)
        
   return predicted_tokens

# Example usage
text_with_mask = "I want to [MASK] a pizza for dinner."
predicted_tokens = predict_next_word(text_with_mask)

# Print the predicted tokens
print(predicted_tokens)