BERT를 활용한 마스킹 단어 예측

import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load the pretrained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Text classification function
def classify_text(text):
	# Tokenize input text
    inputs = tokenizer.encode_plus(
        text, 
        add_special_tokens=True,
        padding='max_length',
        truncation=True, # 길이가 128 이상일 경우 128로 맞추기 위해 사용
        max_length=128,
        return_tensors='pt'
   )
   input_ids = inputs['input_ids'].to(device)
   attention_mask = inputs['attention_mask'].to(device)
   
   # Forward pass through the model
   with torch.no_grad():
   		outputs = model.to('cuda')(input_ids, attention_mask=attention_mask)
        
   logits = outputs.logits
   probabilities = torch.softmax(logits, dim=1).squeeze(dim=0)
   predicted_class = torch.argmax(probabilities).item() # 확률이 가장 높은 클래스를 찾는 것 (예측 클래스)
   
   return predicted_class, probabilities

# Example usage
text_to_classify = "This is an example sentence."
predicted_class, probabilities = classify_text(text_to_classify)

BERT는 다양한 응용 분야에 사용될 수 있음. ex) 다음 단어 예측, 마스킹 된 단어 예측

마스킹 된 단어 예측

import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load the pretrained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Next word prediction function
def predict_next_word(text):
	# Tokenize input text
    tokenized_text = tokenizer.tokenize(text)
    masked_index = tokenized_text.index('[MASK]') # 목표 단어의 위치 찾기
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    
    # Convert tokens to tensor
    tokens_tensor = torch.tensor([indexed_tokens]).to(device)
   
   # Forward pass through the model
   with torch.no_grad():
   		outputs = model.to('cuda')(tokens_tensor)
   
   predictions = outputs[0][0, masked_index].topk(k=5).indices.tolist()
   
   predicted_tokens = []
   for token_index in predictions:
   		predicted_token = tokenizer.convert_ids_to_tokens([token_index])[0]
        predicted_tokens.append(predicted_token)
        
   return predicted_tokens

# Example usage
text_with_mask = "I want to [MASK] a pizza for dinner."
predicted_tokens = predict_next_word(text_with_mask)

# Print the predicted tokens
print(predicted_tokens)

'LLM' 카테고리의 다른 글

LoRA와 QLoRA (0)	2024.08.12
Hugging Face에서 가장 많이 다운로드 된 seq2seq Models: BART, PEGASUS, MT5 (0)	2024.04.24
Hugging Face에서 가장 많이 다운로드 된 Encoder Models: ALBERT, RoBERTa, DistilBERT, ConvBERT, XLM-RoBERTa, Electra, LongFormer (0)	2024.04.24
Reformer: The Efficient Transformer (ICLR 2020) (0)	2024.04.23

마스킹 된 단어 예측

'LLM' 카테고리의 다른 글

티스토리툴바