-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
## Title - Add auto model test code about model training, inference, merge ## Description - Auto encoder model, Auto regressive model, Seq2seq model test - Only test command sh "script.sh" and you can see that "./tests/test_script/run_{train|inference|merg}.sh" file - if use model traning, you would install wnadb library and login, take secret key ## Linked Issues - resolved #00 --------- Co-authored-by: hyungrak.kim <[email protected]>
- Loading branch information
Showing
69 changed files
with
729 additions
and
2,566 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#~/bin/bash | ||
# install for gcc | ||
yum install libaio-devel -y | ||
yum install centos-release-scl -y | ||
yum-config-manager --enable rhel-server-rhscl-7-rpms -y | ||
yum install devtoolset-8 -y | ||
yum install llvm-toolset-7 -y | ||
sudo yum -y install llvm-toolset-7-clang-analyzer llvm-toolset-7-clang-tools-extra | ||
sudo yum -y install pdsh | ||
scl enable devtoolset-8 llvm-toolset-7 bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ pybind11 | |
scipy | ||
torch >= 1.11.0 | ||
transformers | ||
wandb |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import os | ||
from argparse import ArgumentParser | ||
from functools import partial | ||
|
||
from transformers import ( | ||
AutoModelForCausalLM, | ||
AutoModelForMaskedLM, | ||
AutoModelForSeq2SeqLM, | ||
AutoModelForSequenceClassification, | ||
AutoTokenizer, | ||
) | ||
|
||
import oslo | ||
from tests.util.oslo import initialize_oslo, print_rank_0 | ||
|
||
os.environ["TOKENIZERS_PARALLELISM"] = "true" | ||
|
||
parser = ArgumentParser() | ||
parser.add_argument("--local-rank", default=0, type=int) | ||
parser.add_argument("--task", required=True, type=str) | ||
parser.add_argument("--model", required=True, type=str) | ||
parser.add_argument("--tokenizer", default=None, type=str) | ||
parser.add_argument("--input", default=None, type=str) | ||
parser.add_argument("--tensor_parallel_size", default=1, type=int) | ||
parser.add_argument("--data_parallel_size", default=1, type=int) | ||
parser.add_argument("--pipeline_parallel_size", default=1, type=int) | ||
parser.add_argument("--tensor_parallel_depth", default=1, type=int) | ||
parser.add_argument("--tensor_parallel_mode", default="1D", type=str) | ||
|
||
args = parser.parse_args() | ||
generation_task = args.task not in ["causal-lm", "seq2seq-lm"] | ||
args.tokenizer = args.tokenizer if args.tokenizer else args.model | ||
|
||
# 1. Create a tokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer) | ||
|
||
if tokenizer.pad_token is None: | ||
tokenizer.pad_token = tokenizer.eos_token | ||
|
||
# 2. Define tasks and config | ||
TASKS = { | ||
"masked-lm": { | ||
"class": AutoModelForMaskedLM.from_pretrained, | ||
"example": f"Manners maketh man. Do you {tokenizer.mask_token} what that means?", | ||
"output": lambda output: tokenizer.decode(output.logits.argmax(-1)[0]), | ||
}, | ||
"sequence-classification": { | ||
"class": AutoModelForSequenceClassification.from_pretrained, | ||
"example": "I will decide how I feel, I will be happy today.", | ||
"output": lambda output: output.logits.argmax(-1).item(), | ||
}, | ||
"causal-lm": { | ||
"class": AutoModelForCausalLM.from_pretrained, | ||
"example": "I don't want a lot for Christmas. There is just one thing", | ||
"output": lambda output: tokenizer.decode(output[0]), | ||
}, | ||
"seq2seq-lm": { | ||
"class": AutoModelForSeq2SeqLM.from_pretrained, | ||
"example": "Life was like a box of chocolates. You never know what you’re gonna get.", | ||
"output": lambda output: tokenizer.decode(output[0]), | ||
}, | ||
} | ||
|
||
|
||
assert args.task in TASKS, ( | ||
f"{args.task} is not supported task. " | ||
f"Please choose one of {list(TASKS.keys())}. " | ||
"If there are no major problems, it will work for other tasks as well, " | ||
"but I haven't tested it, so if you encounter any problems, " | ||
"please report them through the github issue." | ||
) | ||
|
||
make_result = ( | ||
lambda input, before, after: "\n" | ||
f"Result :\n" | ||
f"> Input: {input}\n" | ||
f"> Output (before OSLO): {TASKS[args.task]['output'](before)}\n" | ||
f"> Output (after OSLO): {TASKS[args.task]['output'](after)}\n" | ||
) | ||
|
||
# 3. Create a model and input | ||
model = TASKS[args.task]["class"](args.model) | ||
input = args.input if args.input is not None else TASKS[args.task]["example"] | ||
forward_fn = model.forward if generation_task else partial(model.generate, num_beams=3) | ||
|
||
if args.task == "causal-lm": | ||
input_data = tokenizer(input, return_tensors="pt") | ||
del input_data["attention_mask"] | ||
else: | ||
input_data = tokenizer(input, return_tensors="pt") | ||
|
||
# 4. Get result before parallelization | ||
output_before = forward_fn(**input_data) | ||
|
||
# 5. Parallelize the model | ||
model_oslo, parallel_context = initialize_oslo(args, model) | ||
forward_fn = ( | ||
model_oslo.forward if generation_task else partial(model_oslo.generate, num_beams=3) | ||
) | ||
|
||
# 6. Get result after parallelization | ||
output_after = forward_fn(**input_data.to("cuda")) | ||
|
||
# 7. Print the results | ||
print_rank_0(make_result(input, output_before, output_after), parallel_context) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import os | ||
import random | ||
import numpy as np | ||
import torch | ||
import torch.distributed as dist | ||
import transformers | ||
import oslo | ||
|
||
from copy import deepcopy | ||
from tensorboardX import SummaryWriter | ||
from datasets import load_dataset | ||
from torch.optim import AdamW | ||
from torch.utils.data.distributed import DistributedSampler | ||
from torch.utils.data import DataLoader | ||
from transformers import AutoTokenizer | ||
|
||
from tqdm import tqdm | ||
from tests.tasks.model_task import ModelTask | ||
from oslo import ParallelContext, ParallelMode | ||
from oslo.torch.nn.parallel import ( | ||
TensorParallel, | ||
PipelineParallel, | ||
DistributedDataParallel, | ||
) | ||
from tests.util.arg_parser import get_args | ||
|
||
# Define tensor parallel mode | ||
tensor_parallel_mode_map = { | ||
"1D": ParallelMode.TENSOR_1D, | ||
"2D": ParallelMode.TENSOR_2D, | ||
"2D_ROW": ParallelMode.TENSOR_2D_ROW, | ||
"2D_COL": ParallelMode.TENSOR_2D_COL, | ||
"2P5D": ParallelMode.TENSOR_2P5D, | ||
"2P5D_ROW": ParallelMode.TENSOR_2P5D_ROW, | ||
"2P5D_COL": ParallelMode.TENSOR_2P5D_COL, | ||
"2P5D_DEP": ParallelMode.TENSOR_2P5D_DEP, | ||
"2P5D_XZ": ParallelMode.TENSOR_2P5D_XZ, | ||
"3D": ParallelMode.TENSOR_3D, | ||
"3D_INPUT": ParallelMode.TENSOR_3D_INPUT, | ||
"3D_WEIGHT": ParallelMode.TENSOR_3D_WEIGHT, | ||
"3D_OUTPUT": ParallelMode.TENSOR_3D_OUTPUT, | ||
} | ||
|
||
|
||
def main(): | ||
args = get_args() | ||
name = ( | ||
f"{args.model}-{args.task}-" | ||
f"bsz={args.batch_size}-" | ||
f"len={args.sequence_length}" | ||
) | ||
|
||
args.local_rank = int(os.environ["LOCAL_RANK"]) | ||
print(args.local_rank) | ||
|
||
# 1. Create parallelized model | ||
model_tasks = ModelTask() | ||
model_tasks_config = model_tasks.get_model_task(args.task) | ||
model_oslo = model_tasks_config["class"](args.model) | ||
|
||
parallel_context = ParallelContext.from_torch( | ||
data_parallel_size=args.data_parallel_size, | ||
pipeline_parallel_size=args.pipeline_parallel_size, | ||
tensor_parallel_size=args.tensor_parallel_size, | ||
tensor_parallel_mode=tensor_parallel_mode_map[args.tensor_parallel_mode], | ||
tensor_parallel_depth=args.tensor_parallel_depth, | ||
) | ||
|
||
model_oslo = TensorParallel(model_oslo, parallel_context) | ||
oslo.ready(model_oslo, parallel_context) | ||
|
||
# 2. Load parallelized model | ||
model_oslo.from_parallelized(path=args.merge_dir) | ||
|
||
# 3. Save and merge model checkpoint | ||
saved_merge_dir = args.merge_dir + "_merge" | ||
model_oslo.save_pretrained(save_directory=saved_merge_dir, merge_checkpoints=True) | ||
|
||
if torch.distributed.get_rank() == 0: | ||
print("Complete checkpoint merge") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import torch | ||
import os | ||
|
||
from functools import partial | ||
from datasets import load_dataset | ||
from transformers import ( | ||
AutoModelForCausalLM, | ||
AutoModelForSeq2SeqLM, | ||
AutoModelForSequenceClassification, | ||
AutoTokenizer, | ||
) | ||
|
||
os.environ["TOKENIZERS_PARALLELISM"] = "true" | ||
|
||
|
||
class ModelTask: | ||
def __init__(self): | ||
""" | ||
Define model task | ||
""" | ||
self.tasks = { | ||
"sequence-classification": { | ||
"class": partial( | ||
AutoModelForSequenceClassification.from_pretrained, num_labels=3 | ||
), | ||
"load_dataset": load_dataset( | ||
"pietrolesci/gpt3_nli", split="train", cache_dir="tests/cache" | ||
), | ||
"preprocessing_map_func": self.mli_task_map_func, | ||
}, | ||
"causal-lm": { | ||
"class": AutoModelForCausalLM.from_pretrained, | ||
"load_dataset": load_dataset( | ||
"squad", | ||
split="train", | ||
cache_dir="tests/cache", | ||
), | ||
"preprocessing_map_func": self.causal_lm_task_map_func, | ||
}, | ||
"seq2seq": { | ||
"class": AutoModelForSeq2SeqLM.from_pretrained, | ||
"load_dataset": load_dataset( | ||
"squad", | ||
split="train", | ||
cache_dir="tests/cache", | ||
), | ||
"preprocessing_map_func": self.seq2seq_task_map_func, | ||
}, | ||
} | ||
|
||
def get_model_task(self, task): | ||
|
||
assert task in self.tasks, ( | ||
f"{task} is not supported task. " | ||
f"Please choose one of {list(self.tasks.keys())}. " | ||
"If there are no major problems, it will work for other tasks as well, " | ||
"but I haven't tested it, so if you encounter any problems, " | ||
"please report them through the github issue." | ||
) | ||
|
||
return self.tasks[task] | ||
|
||
def mli_task_map_func(self, dataset, tokenizer, args): | ||
def preprocess(row_datas): | ||
input_texts = [] | ||
labels = [] | ||
|
||
for text_a, text_b, label in zip( | ||
row_datas["text_a"], row_datas["text_b"], row_datas["label"] | ||
): | ||
input_texts.append(f"{str(text_a)}\n{str(text_b)}") | ||
labels.append(label) | ||
|
||
input_text = tokenizer( | ||
input_texts, | ||
max_length=args.sequence_length, | ||
return_tensors="pt", | ||
padding="max_length", | ||
truncation=True, | ||
) | ||
|
||
ret_labels = torch.tensor(labels, dtype=torch.long) | ||
|
||
return {**input_text, "labels": ret_labels} | ||
|
||
dataset = dataset.select(range(args.train_step)) | ||
return dataset.map( | ||
preprocess, | ||
batched=True, | ||
remove_columns=["text_a", "text_b", "label"], | ||
).with_format("torch") | ||
|
||
def causal_lm_task_map_func(self, dataset, tokenizer, args): | ||
def preprocess(row_datas): | ||
input_text = tokenizer( | ||
row_datas["context"], | ||
max_length=args.sequence_length, | ||
return_tensors="pt", | ||
padding="max_length", | ||
truncation=True, | ||
) | ||
|
||
return {**input_text, "labels": input_text["input_ids"]} | ||
|
||
dataset = dataset.select(range(args.train_step)) | ||
|
||
return dataset.map( | ||
preprocess, | ||
batched=True, | ||
remove_columns=["id", "title", "context", "question", "answers"], | ||
).with_format("torch") | ||
|
||
def seq2seq_task_map_func(self, dataset, tokenizer, args): | ||
def preprocess(row_datas): | ||
input_text = tokenizer( | ||
row_datas["context"], | ||
max_length=args.sequence_length, | ||
return_tensors="pt", | ||
padding="max_length", | ||
truncation=True, | ||
) | ||
|
||
label_text = tokenizer( | ||
row_datas["question"], | ||
max_length=args.sequence_length, | ||
return_tensors="pt", | ||
padding="max_length", | ||
truncation=True, | ||
) | ||
|
||
return {**input_text, "labels": label_text["input_ids"]} | ||
|
||
dataset = dataset.select(range(args.train_step)) | ||
|
||
return dataset.map( | ||
preprocess, | ||
batched=True, | ||
remove_columns=["id", "title", "context", "question", "answers"], | ||
).with_format("torch") |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
## inference shell code | ||
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 bert-base-cased masked-lm `` | ||
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 ishan/bert-base-uncased-mnli sequence-classification `` | ||
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 gpt2 causal-lm `` | ||
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 EleutherAI/gpt-neo-1.3B causal-lm `` | ||
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 t5-base seq2seq-lm `` | ||
|
||
NUM_GPUS=$1 | ||
MODEL=$2 | ||
TASK=$3 | ||
|
||
python -m torch.distributed.launch \ | ||
--nproc_per_node="$NUM_GPUS" \ | ||
./tests/inference.py \ | ||
--task=$TASK \ | ||
--model=$MODEL \ | ||
--tensor_parallel_size="$NUM_GPUS" |
Oops, something went wrong.