import evaluate
= evaluate.load('accuracy') acc
Evaluate
Evaluate
Evaluate 클래스는 accuracy, F1 score, precision, recall, BLEU, ROUGE 등등 다양한 평가 지표를 간단하게 불러와 활용할 수 있다.
계산을 해보는 예시코드이다.
= evaluate.combine(['accuracy','f1','precision','recall'])
metrics =[1,0,0,1], references = [0,1,0,1]) metrics.compute(predictions
{'accuracy': 0.5, 'f1': 0.5, 'precision': 0.5, 'recall': 0.5}
for y,pred in zip([0,1,0,1],[1,0,0,1]):
=pred, references=y)
metrics.add(predictions
metrics.compute()
# .add 메소드는 입력받는 값들이 스칼라 값이어야 한다. 한 번에 하나의 예측값과 정답을 추가.
{'accuracy': 0.5, 'f1': 0.5, 'precision': 0.5, 'recall': 0.5}
-
zip()을 사용하여 references(정답)과 predictions(예측값)를 묶음
for y,preds in zip([[0,1],[0,1]],[[1,0],[0,1]]):
=preds, references=y)
metrics.add_batch(predictions
metrics.compute()
# .add_batch 메소드는 입력받는 값들이 리스트(배치단위)여야 한다. 한 번에 여러 개의 예측값과 정답을 추가
{'accuracy': 0.5, 'f1': 0.5, 'precision': 0.5, 'recall': 0.5}
Create custom metrics
-
딕셔너리 형태로 반환되는 구조의 함수여야 Trainer 클래스의 매개변수인 compute_metrics에 입력하여 사용할 수 있다.
# 정확도를 계산하는 간단한 함수
def simple_accuracy(preds,labels):
return {'accuracy': (preds == labels).to(float).mean().item()}
Trainer 적용
# micro f1 score 사용
import evaluate
def custom_metrics(pred):
= evaluate.load('f1')
f1 = pred.label_ids
labels = pred.predictions.argmax(axis = -1) # pred.predictions를 수행하면 logits값이 출력된다. 그 중 가장 큰 값을 가지는 인덱스를 반환하는 함수
preds return f1.compute(predictions = preds, references = labels, average = 'micro')
#.compute() 함수는 자동으로 결과를 딕셔너리형태로 출력한다.
-
.argmax(?)
axis = -1을 쓰면 배열의 마지막 축에서 계산하는 것. axis=0은 열(세로), aixis= 1은 행(가로)이다.
위에서 pred.predcitions을 했을 때 나오는 결과가 2차원이기에 마지막 축이란 행이 되므로 axis = -1과 axis = 1은 같은 코드이다.
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
Trainer,
TrainingArguments,
default_data_collator
)
= "klue/bert-base"
model_name = AutoTokenizer.from_pretrained(model_name)
tokenizer = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=7)
model
= load_dataset("klue", "ynat")
dataset
def tokenize_function(sample):
= tokenizer(
result "title"],
sample[="max_length",
padding
)return result
= dataset.map(
datasets
tokenize_function,=True,
batched=1000,
batch_size=["guid", "title", "url", "date"]
remove_columns
)print(datasets)
= TrainingArguments(
args =16,
per_device_train_batch_size=16,
per_device_eval_batch_size=5e-5,
learning_rate=500,
max_steps="steps",
evaluation_strategy="steps",
logging_strategy=50,
logging_steps="/content/logs",
logging_dir="steps",
save_strategy=50,
save_steps="/content/ckpt",
output_dir="tensorboard",
report_to
)
= Trainer(
trainer =model,
model=args,
args=datasets["train"],
train_dataset=datasets["validation"],
eval_dataset=tokenizer,
tokenizer=default_data_collator,
data_collator=custom_metrics, # 이 부분을 바꿔준다.
compute_metrics )
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.99b3298ed554f2ad731c27cdb11a6215f39b90bc845ff5ce709bb4e74ba45621
Model config BertConfig {
"architectures": [
"BertForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"transformers_version": "4.11.3",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 32000
}
loading file https://huggingface.co/klue/bert-base/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/1a36e69d48a008e522b75e43693002ffc8b6e6df72de7c53412c23466ec165eb.085110015ec67fc02ad067f712a7c83aafefaf31586a3361dd800bcac635b456
loading file https://huggingface.co/klue/bert-base/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/310a974e892b181d75eed58b545cc0592d066ae4ef35cc760ea92e9b0bf65b3b.74f7933572f937b11a02b2cfb4e88a024059be36c84f53241b85b1fec49e21f7
loading file https://huggingface.co/klue/bert-base/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/klue/bert-base/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/aeaaa3afd086a040be912f92ffe7b5f85008b744624f4517c4216bcc32b51cf0.054ece8d16bd524c8a00f0e8a976c00d5de22a755ffb79e353ee2954d9289e26
loading file https://huggingface.co/klue/bert-base/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/f8f71eb411bb03f57b455cfb1b4e04ae124201312e67a3ad66e0a92d0c228325.78871951edcb66032caa0a9628d77b3557c23616c653dacdb7a1a8f33011a843
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.99b3298ed554f2ad731c27cdb11a6215f39b90bc845ff5ce709bb4e74ba45621
Model config BertConfig {
"architectures": [
"BertForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"transformers_version": "4.11.3",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 32000
}
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.99b3298ed554f2ad731c27cdb11a6215f39b90bc845ff5ce709bb4e74ba45621
Model config BertConfig {
"architectures": [
"BertForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1",
"2": "LABEL_2",
"3": "LABEL_3",
"4": "LABEL_4",
"5": "LABEL_5",
"6": "LABEL_6"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1,
"LABEL_2": 2,
"LABEL_3": 3,
"LABEL_4": 4,
"LABEL_5": 5,
"LABEL_6": 6
},
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"transformers_version": "4.11.3",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 32000
}
loading weights file https://huggingface.co/klue/bert-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/05b36ee62545d769939a7746eca739b844a40a7a7553700f110b58b28ed6a949.7cb231256a5dbe886e12b902d05cb1241f330d8c19428508f91b2b28c1cfe0b6
Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at klue/bert-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
using `logging_steps` to initialize `eval_steps` to 50
PyTorch: setting up devices
max_steps is given, it will override any value given in num_train_epochs
DatasetDict({
train: Dataset({
features: ['label', 'input_ids', 'token_type_ids', 'attention_mask'],
num_rows: 45678
})
validation: Dataset({
features: ['label', 'input_ids', 'token_type_ids', 'attention_mask'],
num_rows: 9107
})
})
trainer.train()
***** Running training *****
Num examples = 45678
Num Epochs = 1
Instantaneous batch size per device = 16
Total train batch size (w. parallel, distributed & accumulation) = 16
Gradient Accumulation steps = 1
Total optimization steps = 500
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-50
Configuration saved in /content/ckpt/checkpoint-50/config.json
Model weights saved in /content/ckpt/checkpoint-50/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-50/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-50/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-100
Configuration saved in /content/ckpt/checkpoint-100/config.json
Model weights saved in /content/ckpt/checkpoint-100/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-100/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-100/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-150
Configuration saved in /content/ckpt/checkpoint-150/config.json
Model weights saved in /content/ckpt/checkpoint-150/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-150/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-150/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-200
Configuration saved in /content/ckpt/checkpoint-200/config.json
Model weights saved in /content/ckpt/checkpoint-200/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-200/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-200/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-250
Configuration saved in /content/ckpt/checkpoint-250/config.json
Model weights saved in /content/ckpt/checkpoint-250/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-250/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-250/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-300
Configuration saved in /content/ckpt/checkpoint-300/config.json
Model weights saved in /content/ckpt/checkpoint-300/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-300/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-300/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-350
Configuration saved in /content/ckpt/checkpoint-350/config.json
Model weights saved in /content/ckpt/checkpoint-350/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-350/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-350/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-400
Configuration saved in /content/ckpt/checkpoint-400/config.json
Model weights saved in /content/ckpt/checkpoint-400/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-400/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-400/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-450
Configuration saved in /content/ckpt/checkpoint-450/config.json
Model weights saved in /content/ckpt/checkpoint-450/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-450/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-450/special_tokens_map.json
***** Running Evaluation *****
Num examples = 9107
Batch size = 16
Saving model checkpoint to /content/ckpt/checkpoint-500
Configuration saved in /content/ckpt/checkpoint-500/config.json
Model weights saved in /content/ckpt/checkpoint-500/pytorch_model.bin
tokenizer config file saved in /content/ckpt/checkpoint-500/tokenizer_config.json
Special tokens file saved in /content/ckpt/checkpoint-500/special_tokens_map.json
Training completed. Do not forget to share your model on huggingface.co/models =)
[500/500 18:44, Epoch 0/1]
Step | Training Loss | Validation Loss | F1 |
---|---|---|---|
50 | 1.082100 | 0.825163 | 0.702427 |
100 | 0.571100 | 0.593032 | 0.810695 |
150 | 0.476000 | 0.570962 | 0.816844 |
200 | 0.500600 | 0.535641 | 0.815966 |
250 | 0.454800 | 0.501376 | 0.833535 |
300 | 0.433800 | 0.479584 | 0.837158 |
350 | 0.397700 | 0.483717 | 0.842868 |
400 | 0.442900 | 0.449807 | 0.851104 |
450 | 0.420800 | 0.434349 | 0.853300 |
500 | 0.406100 | 0.438009 | 0.853080 |
TrainOutput(global_step=500, training_loss=0.5185918083190918, metrics={'train_runtime': 1125.3103, 'train_samples_per_second': 7.109, 'train_steps_per_second': 0.444, 'total_flos': 2104982937600000.0, 'train_loss': 0.5185918083190918, 'epoch': 0.18})
-
결과에서 이전에는 볼 수 없었던 F1 score이 보인다.