Skip to content

Commit

Permalink
Feature/auto model test (#213)
Browse files Browse the repository at this point in the history
## Title

- Add auto model test code about model training, inference, merge

## Description

- Auto encoder model, Auto regressive model, Seq2seq model test 
- Only test command sh "script.sh" and you can see that
"./tests/test_script/run_{train|inference|merg}.sh" file
- if use model traning, you would install wnadb library and login, take
secret key

## Linked Issues

- resolved #00

---------

Co-authored-by: hyungrak.kim <[email protected]>
  • Loading branch information
koliaok and hyungrak.kim committed Aug 14, 2023
1 parent 4cb1ae0 commit 21ef4a1
Show file tree
Hide file tree
Showing 69 changed files with 729 additions and 2,566 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,11 @@ wandb/

# ignore run log files
**/nohup.out

# auto test
tests/cache
tests/ckpt
.ipynb_checkpoints
tests/dataset_download
"core.python"
tests/dataset_download.ipynb
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ Be careful that the ‘core’ is in the PyPI project name.
pip install oslo-core
```

## CentOS OS case install
```console
# sh gcc_install.sh
# pip install oslo-core
```

## Administrative Notes

### Citing OSLO
Expand Down
10 changes: 10 additions & 0 deletions gcc_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#~/bin/bash
# install for gcc
yum install libaio-devel -y
yum install centos-release-scl -y
yum-config-manager --enable rhel-server-rhscl-7-rpms -y
yum install devtoolset-8 -y
yum install llvm-toolset-7 -y
sudo yum -y install llvm-toolset-7-clang-analyzer llvm-toolset-7-clang-tools-extra
sudo yum -y install pdsh
scl enable devtoolset-8 llvm-toolset-7 bash
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ pybind11
scipy
torch >= 1.11.0
transformers
wandb
Empty file removed tests/__init__.py
Empty file.
105 changes: 105 additions & 0 deletions tests/inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import os
from argparse import ArgumentParser
from functools import partial

from transformers import (
AutoModelForCausalLM,
AutoModelForMaskedLM,
AutoModelForSeq2SeqLM,
AutoModelForSequenceClassification,
AutoTokenizer,
)

import oslo
from tests.util.oslo import initialize_oslo, print_rank_0

os.environ["TOKENIZERS_PARALLELISM"] = "true"

parser = ArgumentParser()
parser.add_argument("--local-rank", default=0, type=int)
parser.add_argument("--task", required=True, type=str)
parser.add_argument("--model", required=True, type=str)
parser.add_argument("--tokenizer", default=None, type=str)
parser.add_argument("--input", default=None, type=str)
parser.add_argument("--tensor_parallel_size", default=1, type=int)
parser.add_argument("--data_parallel_size", default=1, type=int)
parser.add_argument("--pipeline_parallel_size", default=1, type=int)
parser.add_argument("--tensor_parallel_depth", default=1, type=int)
parser.add_argument("--tensor_parallel_mode", default="1D", type=str)

args = parser.parse_args()
generation_task = args.task not in ["causal-lm", "seq2seq-lm"]
args.tokenizer = args.tokenizer if args.tokenizer else args.model

# 1. Create a tokenizer
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer)

if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token

# 2. Define tasks and config
TASKS = {
"masked-lm": {
"class": AutoModelForMaskedLM.from_pretrained,
"example": f"Manners maketh man. Do you {tokenizer.mask_token} what that means?",
"output": lambda output: tokenizer.decode(output.logits.argmax(-1)[0]),
},
"sequence-classification": {
"class": AutoModelForSequenceClassification.from_pretrained,
"example": "I will decide how I feel, I will be happy today.",
"output": lambda output: output.logits.argmax(-1).item(),
},
"causal-lm": {
"class": AutoModelForCausalLM.from_pretrained,
"example": "I don't want a lot for Christmas. There is just one thing",
"output": lambda output: tokenizer.decode(output[0]),
},
"seq2seq-lm": {
"class": AutoModelForSeq2SeqLM.from_pretrained,
"example": "Life was like a box of chocolates. You never know what you’re gonna get.",
"output": lambda output: tokenizer.decode(output[0]),
},
}


assert args.task in TASKS, (
f"{args.task} is not supported task. "
f"Please choose one of {list(TASKS.keys())}. "
"If there are no major problems, it will work for other tasks as well, "
"but I haven't tested it, so if you encounter any problems, "
"please report them through the github issue."
)

make_result = (
lambda input, before, after: "\n"
f"Result :\n"
f"> Input: {input}\n"
f"> Output (before OSLO): {TASKS[args.task]['output'](before)}\n"
f"> Output (after OSLO): {TASKS[args.task]['output'](after)}\n"
)

# 3. Create a model and input
model = TASKS[args.task]["class"](args.model)
input = args.input if args.input is not None else TASKS[args.task]["example"]
forward_fn = model.forward if generation_task else partial(model.generate, num_beams=3)

if args.task == "causal-lm":
input_data = tokenizer(input, return_tensors="pt")
del input_data["attention_mask"]
else:
input_data = tokenizer(input, return_tensors="pt")

# 4. Get result before parallelization
output_before = forward_fn(**input_data)

# 5. Parallelize the model
model_oslo, parallel_context = initialize_oslo(args, model)
forward_fn = (
model_oslo.forward if generation_task else partial(model_oslo.generate, num_beams=3)
)

# 6. Get result after parallelization
output_after = forward_fn(**input_data.to("cuda"))

# 7. Print the results
print_rank_0(make_result(input, output_before, output_after), parallel_context)
84 changes: 84 additions & 0 deletions tests/merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os
import random
import numpy as np
import torch
import torch.distributed as dist
import transformers
import oslo

from copy import deepcopy
from tensorboardX import SummaryWriter
from datasets import load_dataset
from torch.optim import AdamW
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader
from transformers import AutoTokenizer

from tqdm import tqdm
from tests.tasks.model_task import ModelTask
from oslo import ParallelContext, ParallelMode
from oslo.torch.nn.parallel import (
TensorParallel,
PipelineParallel,
DistributedDataParallel,
)
from tests.util.arg_parser import get_args

# Define tensor parallel mode
tensor_parallel_mode_map = {
"1D": ParallelMode.TENSOR_1D,
"2D": ParallelMode.TENSOR_2D,
"2D_ROW": ParallelMode.TENSOR_2D_ROW,
"2D_COL": ParallelMode.TENSOR_2D_COL,
"2P5D": ParallelMode.TENSOR_2P5D,
"2P5D_ROW": ParallelMode.TENSOR_2P5D_ROW,
"2P5D_COL": ParallelMode.TENSOR_2P5D_COL,
"2P5D_DEP": ParallelMode.TENSOR_2P5D_DEP,
"2P5D_XZ": ParallelMode.TENSOR_2P5D_XZ,
"3D": ParallelMode.TENSOR_3D,
"3D_INPUT": ParallelMode.TENSOR_3D_INPUT,
"3D_WEIGHT": ParallelMode.TENSOR_3D_WEIGHT,
"3D_OUTPUT": ParallelMode.TENSOR_3D_OUTPUT,
}


def main():
args = get_args()
name = (
f"{args.model}-{args.task}-"
f"bsz={args.batch_size}-"
f"len={args.sequence_length}"
)

args.local_rank = int(os.environ["LOCAL_RANK"])
print(args.local_rank)

# 1. Create parallelized model
model_tasks = ModelTask()
model_tasks_config = model_tasks.get_model_task(args.task)
model_oslo = model_tasks_config["class"](args.model)

parallel_context = ParallelContext.from_torch(
data_parallel_size=args.data_parallel_size,
pipeline_parallel_size=args.pipeline_parallel_size,
tensor_parallel_size=args.tensor_parallel_size,
tensor_parallel_mode=tensor_parallel_mode_map[args.tensor_parallel_mode],
tensor_parallel_depth=args.tensor_parallel_depth,
)

model_oslo = TensorParallel(model_oslo, parallel_context)
oslo.ready(model_oslo, parallel_context)

# 2. Load parallelized model
model_oslo.from_parallelized(path=args.merge_dir)

# 3. Save and merge model checkpoint
saved_merge_dir = args.merge_dir + "_merge"
model_oslo.save_pretrained(save_directory=saved_merge_dir, merge_checkpoints=True)

if torch.distributed.get_rank() == 0:
print("Complete checkpoint merge")


if __name__ == "__main__":
main()
139 changes: 139 additions & 0 deletions tests/tasks/model_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import torch
import os

from functools import partial
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoModelForSeq2SeqLM,
AutoModelForSequenceClassification,
AutoTokenizer,
)

os.environ["TOKENIZERS_PARALLELISM"] = "true"


class ModelTask:
def __init__(self):
"""
Define model task
"""
self.tasks = {
"sequence-classification": {
"class": partial(
AutoModelForSequenceClassification.from_pretrained, num_labels=3
),
"load_dataset": load_dataset(
"pietrolesci/gpt3_nli", split="train", cache_dir="tests/cache"
),
"preprocessing_map_func": self.mli_task_map_func,
},
"causal-lm": {
"class": AutoModelForCausalLM.from_pretrained,
"load_dataset": load_dataset(
"squad",
split="train",
cache_dir="tests/cache",
),
"preprocessing_map_func": self.causal_lm_task_map_func,
},
"seq2seq": {
"class": AutoModelForSeq2SeqLM.from_pretrained,
"load_dataset": load_dataset(
"squad",
split="train",
cache_dir="tests/cache",
),
"preprocessing_map_func": self.seq2seq_task_map_func,
},
}

def get_model_task(self, task):

assert task in self.tasks, (
f"{task} is not supported task. "
f"Please choose one of {list(self.tasks.keys())}. "
"If there are no major problems, it will work for other tasks as well, "
"but I haven't tested it, so if you encounter any problems, "
"please report them through the github issue."
)

return self.tasks[task]

def mli_task_map_func(self, dataset, tokenizer, args):
def preprocess(row_datas):
input_texts = []
labels = []

for text_a, text_b, label in zip(
row_datas["text_a"], row_datas["text_b"], row_datas["label"]
):
input_texts.append(f"{str(text_a)}\n{str(text_b)}")
labels.append(label)

input_text = tokenizer(
input_texts,
max_length=args.sequence_length,
return_tensors="pt",
padding="max_length",
truncation=True,
)

ret_labels = torch.tensor(labels, dtype=torch.long)

return {**input_text, "labels": ret_labels}

dataset = dataset.select(range(args.train_step))
return dataset.map(
preprocess,
batched=True,
remove_columns=["text_a", "text_b", "label"],
).with_format("torch")

def causal_lm_task_map_func(self, dataset, tokenizer, args):
def preprocess(row_datas):
input_text = tokenizer(
row_datas["context"],
max_length=args.sequence_length,
return_tensors="pt",
padding="max_length",
truncation=True,
)

return {**input_text, "labels": input_text["input_ids"]}

dataset = dataset.select(range(args.train_step))

return dataset.map(
preprocess,
batched=True,
remove_columns=["id", "title", "context", "question", "answers"],
).with_format("torch")

def seq2seq_task_map_func(self, dataset, tokenizer, args):
def preprocess(row_datas):
input_text = tokenizer(
row_datas["context"],
max_length=args.sequence_length,
return_tensors="pt",
padding="max_length",
truncation=True,
)

label_text = tokenizer(
row_datas["question"],
max_length=args.sequence_length,
return_tensors="pt",
padding="max_length",
truncation=True,
)

return {**input_text, "labels": label_text["input_ids"]}

dataset = dataset.select(range(args.train_step))

return dataset.map(
preprocess,
batched=True,
remove_columns=["id", "title", "context", "question", "answers"],
).with_format("torch")
Empty file removed tests/test_all.py
Empty file.
17 changes: 17 additions & 0 deletions tests/test_script/run_inference.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## inference shell code
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 bert-base-cased masked-lm ``
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 ishan/bert-base-uncased-mnli sequence-classification ``
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 gpt2 causal-lm ``
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 EleutherAI/gpt-neo-1.3B causal-lm ``
# EXAMPLE: ``sh ./tests/test_script/run_inference.sh 4 t5-base seq2seq-lm ``

NUM_GPUS=$1
MODEL=$2
TASK=$3

python -m torch.distributed.launch \
--nproc_per_node="$NUM_GPUS" \
./tests/inference.py \
--task=$TASK \
--model=$MODEL \
--tensor_parallel_size="$NUM_GPUS"
Loading

0 comments on commit 21ef4a1

Please sign in to comment.