-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbench.sh
executable file
·139 lines (126 loc) · 4.2 KB
/
bench.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/bin/bash
########################################################################################################
# Script: bench.sh
# Description: This script runs benchmarks Nvidia TensorRT LLM benchmark.
#
# Usage: ./bench.sh [OPTIONS]
# OPTIONS:
# -p, --prompt Prompt for benchmarks (default: 'Write an essay about the transformer model architecture')
# -r, --repetitions Number of repetitions for benchmarks (default: 10)
# -m, --max_tokens Maximum number of tokens for benchmarks (default: 512)
# -d, --device Device for benchmarks (possible values: 'metal', 'cuda', and 'cpu', default: 'cuda')
# -n, --model_name The name of the model to benchmark (possible values: 'llama' for using Llama2, 'mistral' for using Mistral 7B v0.1)
# -lf, --log_file Logging file name.
# -h, --help Show this help message
########################################################################################################
set -euo pipefail
print_usage() {
echo "Usage: $0 [OPTIONS]"
echo "OPTIONS:"
echo " -p, --prompt Prompt for benchmarks (default: 'Write an essay about the transformer model architecture')"
echo " -r, --repetitions Number of repetitions for benchmarks (default: 10)"
echo " -m, --max_tokens Maximum number of tokens for benchmarks (default: 512)"
echo " -d, --device Device for benchmarks (possible values: 'metal', 'cuda', and 'cpu', default: 'cuda')"
echo " -n, --model_name The name of the model to benchmark (possible values: 'llama' for using Llama2, 'mistral' for using Mistral 7B v0.1)"
echo " -lf, --log_file Logging file name."
echo " -h, --help Show this help message"
exit 1
}
CURRENT_DIR="$(pwd)"
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
check_cuda() {
if command -v nvcc &> /dev/null
then
echo -e "\nUsing CUDA"
nvcc --version
else
echo -e "\nCUDA is not available."
exit 1
fi
}
check_platform() {
local platform
platform=$(uname -s)
if [[ "$platform" == "Linux" ]]; then
echo "Running on Linux."
elif [[ "$platform" == "Darwin" ]]; then
echo "Running on Mac OS."
else
echo "Unknown platform."
exit 1
fi
}
setup() {
local MODEL_NAME="${1:-llama}"
echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
bash "$SCRIPT_DIR/setup.sh" "$MODEL_NAME"
}
# Parse command-line arguments
while [ "$#" -gt 0 ]; do
case "$1" in
-p|--prompt)
PROMPT="$2"
shift 2
;;
-r|--repetitions)
REPETITIONS="$2"
shift 2
;;
-m|--max_tokens)
MAX_TOKENS="$2"
shift 2
;;
-d|--device)
DEVICE="$2"
case "$DEVICE" in
"cuda" | "metal" | "cpu")
;;
*)
echo "Invalid value for --device. Please use 'cuda', 'cpu' or 'metal'."
print_usage
;;
esac
if [ "$DEVICE" == "cuda" ]; then
check_cuda
else
echo "Not supported for $DEVICE"
exit 1
fi
shift 2
;;
-n|--model_name)
MODEL_NAME="$2"
shift 2
;;
-h|--help)
print_usage
;;
*)
echo "Unknown option: $1"
print_usage
;;
esac
done
check_platform
check_python
setup "$MODEL_NAME"
# Set default values if not provided
PROMPT="${PROMPT:-"Write an essay about the transformer model architecture"}"
REPETITIONS="${REPETITIONS:-10}"
MAX_TOKENS="${MAX_TOKENS:-512}"
DEVICE="${DEVICE:-'cuda'}"
MODEL_NAME="${MODEL_NAME:-"llama"}"
docker run \
--gpus all \
--ipc=host \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
-e PYTHONUNBUFFERED=1 \
-v "$CURRENT_DIR:/mnt/benchmarks" \
-it tensorrt_llm/release:latest \
python3 -u "/mnt/benchmarks/bench_tensorrtllm/bench.py" \
--prompt "$PROMPT" \
--repetitions "$REPETITIONS" \
--max_tokens "$MAX_TOKENS" \
--model_name "$MODEL_NAME" \
--device "$DEVICE"