import torch
from transformers import BertTokenizer, BertForSequenceClassification
# Load the pretrained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Text classification function
def classify_text(text):
# Tokenize input text
inputs = tokenizer.encode_plus(
text,
add_special_tokens=True,
padding='max_length',
truncation=True, # 길이가 128 이상일 경우 128로 맞추기 위해 사용
max_length=128,
return_tensors='pt'
)
input_ids = inputs['input_ids'].to(device)
attention_mask = inputs['attention_mask'].to(device)
# Forward pass through the model
with torch.no_grad():
outputs = model.to('cuda')(input_ids, attention_mask=attention_mask)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1).squeeze(dim=0)
predicted_class = torch.argmax(probabilities).item() # 확률이 가장 높은 클래스를 찾는 것 (예측 클래스)
return predicted_class, probabilities
# Example usage
text_to_classify = "This is an example sentence."
predicted_class, probabilities = classify_text(text_to_classify)
BERT는 다양한 응용 분야에 사용될 수 있음. ex) 다음 단어 예측, 마스킹 된 단어 예측
마스킹 된 단어 예측
import torch
from transformers import BertTokenizer, BertForSequenceClassification
# Load the pretrained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Next word prediction function
def predict_next_word(text):
# Tokenize input text
tokenized_text = tokenizer.tokenize(text)
masked_index = tokenized_text.index('[MASK]') # 목표 단어의 위치 찾기
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
# Convert tokens to tensor
tokens_tensor = torch.tensor([indexed_tokens]).to(device)
# Forward pass through the model
with torch.no_grad():
outputs = model.to('cuda')(tokens_tensor)
predictions = outputs[0][0, masked_index].topk(k=5).indices.tolist()
predicted_tokens = []
for token_index in predictions:
predicted_token = tokenizer.convert_ids_to_tokens([token_index])[0]
predicted_tokens.append(predicted_token)
return predicted_tokens
# Example usage
text_with_mask = "I want to [MASK] a pizza for dinner."
predicted_tokens = predict_next_word(text_with_mask)
# Print the predicted tokens
print(predicted_tokens)
'LLM' 카테고리의 다른 글
LoRA와 QLoRA (0) | 2024.08.12 |
---|---|
Hugging Face에서 가장 많이 다운로드 된 seq2seq Models: BART, PEGASUS, MT5 (0) | 2024.04.24 |
Hugging Face에서 가장 많이 다운로드 된 Encoder Models: ALBERT, RoBERTa, DistilBERT, ConvBERT, XLM-RoBERTa, Electra, LongFormer (0) | 2024.04.24 |
Reformer: The Efficient Transformer (ICLR 2020) (0) | 2024.04.23 |