Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
lizhiping
text-generation-inference
Commits
dca0fe25
Commit
dca0fe25
authored
2 years ago
by
Ubuntu
Browse files
Options
Download
Email Patches
Plain Diff
Adding GPTQ integration tests.
parent
16d0fb04
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
675 additions
and
2 deletions
+675
-2
integration-tests/conftest.py
integration-tests/conftest.py
+2
-2
integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
...pshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
+103
-0
integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
...st_flash_llama_gptq/test_flash_llama_gptq_all_params.json
+98
-0
integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
...s__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
+414
-0
integration-tests/models/test_flash_llama_gptq.py
integration-tests/models/test_flash_llama_gptq.py
+58
-0
No files found.
integration-tests/conftest.py
View file @
dca0fe25
...
...
@@ -232,9 +232,9 @@ def launcher(event_loop):
if
num_shard
is
not
None
:
args
.
extend
([
"--num-shard"
,
str
(
num_shard
)])
if
quantize
:
if
quantize
is
not
None
:
args
.
append
(
"--quantize"
)
args
.
append
(
"bitsandbytes"
)
args
.
append
(
quantize
)
if
trust_remote_code
:
args
.
append
(
"--trust-remote-code"
)
...
...
This diff is collapsed.
Click to expand it.
integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
0 → 100644
View file @
dca0fe25
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1724
,
"logprob"
:
-9.953125
,
"text"
:
"What"
},
{
"id"
:
338
,
"logprob"
:
-1.4121094
,
"text"
:
"is"
},
{
"id"
:
6483
,
"logprob"
:
-9.9765625
,
"text"
:
"deep"
},
{
"id"
:
6509
,
"logprob"
:
-1.6767578
,
"text"
:
"learning"
},
{
"id"
:
1577
,
"logprob"
:
-4.5976562
,
"text"
:
"?"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
13
,
"logprob"
:
-0.21813965
,
"special"
:
false
,
"text"
:
"
\n
"
},
{
"id"
:
2772
,
"logprob"
:
-1.4130859
,
"special"
:
false
,
"text"
:
"De"
},
{
"id"
:
1022
,
"logprob"
:
-0.0028419495
,
"special"
:
false
,
"text"
:
"ep"
},
{
"id"
:
6509
,
"logprob"
:
-0.3244629
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
338
,
"logprob"
:
-0.25439453
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
263
,
"logprob"
:
-0.43774414
,
"special"
:
false
,
"text"
:
" a"
},
{
"id"
:
4933
,
"logprob"
:
-1.8105469
,
"special"
:
false
,
"text"
:
" machine"
},
{
"id"
:
6509
,
"logprob"
:
-0.07116699
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
11043
,
"logprob"
:
-0.87158203
,
"special"
:
false
,
"text"
:
" technique"
},
{
"id"
:
393
,
"logprob"
:
-0.91015625
,
"special"
:
false
,
"text"
:
" that"
}
]
},
"generated_text"
:
"
\n
Deep learning is a machine learning technique that"
}
This diff is collapsed.
Click to expand it.
integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
0 → 100644
View file @
dca0fe25
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
338
,
"logprob"
:
-10.8046875
,
"text"
:
"is"
},
{
"id"
:
6483
,
"logprob"
:
-12.6640625
,
"text"
:
"deep"
},
{
"id"
:
6509
,
"logprob"
:
-3.3398438
,
"text"
:
"learning"
},
{
"id"
:
1577
,
"logprob"
:
-8.3828125
,
"text"
:
"?"
}
],
"seed"
:
0
,
"tokens"
:
[
{
"id"
:
13
,
"logprob"
:
0.0
,
"special"
:
false
,
"text"
:
"
\n
"
},
{
"id"
:
4013
,
"logprob"
:
-2.6992188
,
"special"
:
false
,
"text"
:
"This"
},
{
"id"
:
1139
,
"logprob"
:
-0.35668945
,
"special"
:
false
,
"text"
:
" question"
},
{
"id"
:
756
,
"logprob"
:
-0.08251953
,
"special"
:
false
,
"text"
:
" has"
},
{
"id"
:
1063
,
"logprob"
:
-0.39697266
,
"special"
:
false
,
"text"
:
" been"
},
{
"id"
:
4433
,
"logprob"
:
0.0
,
"special"
:
false
,
"text"
:
" asked"
},
{
"id"
:
1784
,
"logprob"
:
-0.9248047
,
"special"
:
false
,
"text"
:
" many"
},
{
"id"
:
3064
,
"logprob"
:
0.0
,
"special"
:
false
,
"text"
:
" times"
},
{
"id"
:
1434
,
"logprob"
:
-0.90625
,
"special"
:
false
,
"text"
:
" before"
},
{
"id"
:
29892
,
"logprob"
:
-0.19580078
,
"special"
:
false
,
"text"
:
","
}
]
},
"generated_text"
:
"What is deep learning ?
\n
This question has been asked many times before,"
}
This diff is collapsed.
Click to expand it.
integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
0 → 100644
View file @
dca0fe25
[
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1724
,
"logprob"
:
-9.953125
,
"text"
:
"What"
},
{
"id"
:
338
,
"logprob"
:
-1.4121094
,
"text"
:
"is"
},
{
"id"
:
6483
,
"logprob"
:
-9.9765625
,
"text"
:
"deep"
},
{
"id"
:
6509
,
"logprob"
:
-1.6767578
,
"text"
:
"learning"
},
{
"id"
:
1577
,
"logprob"
:
-4.5976562
,
"text"
:
"?"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
13
,
"logprob"
:
-0.21813965
,
"special"
:
false
,
"text"
:
"
\n
"
},
{
"id"
:
2772
,
"logprob"
:
-1.4189453
,
"special"
:
false
,
"text"
:
"De"
},
{
"id"
:
1022
,
"logprob"
:
-0.0028419495
,
"special"
:
false
,
"text"
:
"ep"
},
{
"id"
:
6509
,
"logprob"
:
-0.3244629
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
338
,
"logprob"
:
-0.25439453
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
263
,
"logprob"
:
-0.4375
,
"special"
:
false
,
"text"
:
" a"
},
{
"id"
:
4933
,
"logprob"
:
-1.8105469
,
"special"
:
false
,
"text"
:
" machine"
},
{
"id"
:
6509
,
"logprob"
:
-0.07116699
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
11043
,
"logprob"
:
-0.87158203
,
"special"
:
false
,
"text"
:
" technique"
},
{
"id"
:
393
,
"logprob"
:
-0.91015625
,
"special"
:
false
,
"text"
:
" that"
}
]
},
"generated_text"
:
"
\n
Deep learning is a machine learning technique that"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1724
,
"logprob"
:
-9.953125
,
"text"
:
"What"
},
{
"id"
:
338
,
"logprob"
:
-1.4121094
,
"text"
:
"is"
},
{
"id"
:
6483
,
"logprob"
:
-9.9765625
,
"text"
:
"deep"
},
{
"id"
:
6509
,
"logprob"
:
-1.6767578
,
"text"
:
"learning"
},
{
"id"
:
1577
,
"logprob"
:
-4.5976562
,
"text"
:
"?"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
13
,
"logprob"
:
-0.21813965
,
"special"
:
false
,
"text"
:
"
\n
"
},
{
"id"
:
2772
,
"logprob"
:
-1.4189453
,
"special"
:
false
,
"text"
:
"De"
},
{
"id"
:
1022
,
"logprob"
:
-0.0028419495
,
"special"
:
false
,
"text"
:
"ep"
},
{
"id"
:
6509
,
"logprob"
:
-0.3244629
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
338
,
"logprob"
:
-0.25439453
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
263
,
"logprob"
:
-0.4375
,
"special"
:
false
,
"text"
:
" a"
},
{
"id"
:
4933
,
"logprob"
:
-1.8105469
,
"special"
:
false
,
"text"
:
" machine"
},
{
"id"
:
6509
,
"logprob"
:
-0.07116699
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
11043
,
"logprob"
:
-0.87158203
,
"special"
:
false
,
"text"
:
" technique"
},
{
"id"
:
393
,
"logprob"
:
-0.91015625
,
"special"
:
false
,
"text"
:
" that"
}
]
},
"generated_text"
:
"
\n
Deep learning is a machine learning technique that"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1724
,
"logprob"
:
-9.953125
,
"text"
:
"What"
},
{
"id"
:
338
,
"logprob"
:
-1.4121094
,
"text"
:
"is"
},
{
"id"
:
6483
,
"logprob"
:
-9.9765625
,
"text"
:
"deep"
},
{
"id"
:
6509
,
"logprob"
:
-1.6767578
,
"text"
:
"learning"
},
{
"id"
:
1577
,
"logprob"
:
-4.5976562
,
"text"
:
"?"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
13
,
"logprob"
:
-0.21813965
,
"special"
:
false
,
"text"
:
"
\n
"
},
{
"id"
:
2772
,
"logprob"
:
-1.4189453
,
"special"
:
false
,
"text"
:
"De"
},
{
"id"
:
1022
,
"logprob"
:
-0.0028419495
,
"special"
:
false
,
"text"
:
"ep"
},
{
"id"
:
6509
,
"logprob"
:
-0.3244629
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
338
,
"logprob"
:
-0.25439453
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
263
,
"logprob"
:
-0.4375
,
"special"
:
false
,
"text"
:
" a"
},
{
"id"
:
4933
,
"logprob"
:
-1.8105469
,
"special"
:
false
,
"text"
:
" machine"
},
{
"id"
:
6509
,
"logprob"
:
-0.07116699
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
11043
,
"logprob"
:
-0.87158203
,
"special"
:
false
,
"text"
:
" technique"
},
{
"id"
:
393
,
"logprob"
:
-0.91015625
,
"special"
:
false
,
"text"
:
" that"
}
]
},
"generated_text"
:
"
\n
Deep learning is a machine learning technique that"
},
{
"details"
:
{
"best_of_sequences"
:
null
,
"finish_reason"
:
"length"
,
"generated_tokens"
:
10
,
"prefill"
:
[
{
"id"
:
1
,
"logprob"
:
null
,
"text"
:
"<s>"
},
{
"id"
:
1724
,
"logprob"
:
-9.953125
,
"text"
:
"What"
},
{
"id"
:
338
,
"logprob"
:
-1.4121094
,
"text"
:
"is"
},
{
"id"
:
6483
,
"logprob"
:
-9.9765625
,
"text"
:
"deep"
},
{
"id"
:
6509
,
"logprob"
:
-1.6767578
,
"text"
:
"learning"
},
{
"id"
:
1577
,
"logprob"
:
-4.5976562
,
"text"
:
"?"
}
],
"seed"
:
null
,
"tokens"
:
[
{
"id"
:
13
,
"logprob"
:
-0.21813965
,
"special"
:
false
,
"text"
:
"
\n
"
},
{
"id"
:
2772
,
"logprob"
:
-1.4189453
,
"special"
:
false
,
"text"
:
"De"
},
{
"id"
:
1022
,
"logprob"
:
-0.0028419495
,
"special"
:
false
,
"text"
:
"ep"
},
{
"id"
:
6509
,
"logprob"
:
-0.3244629
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
338
,
"logprob"
:
-0.25439453
,
"special"
:
false
,
"text"
:
" is"
},
{
"id"
:
263
,
"logprob"
:
-0.4375
,
"special"
:
false
,
"text"
:
" a"
},
{
"id"
:
4933
,
"logprob"
:
-1.8105469
,
"special"
:
false
,
"text"
:
" machine"
},
{
"id"
:
6509
,
"logprob"
:
-0.07116699
,
"special"
:
false
,
"text"
:
" learning"
},
{
"id"
:
11043
,
"logprob"
:
-0.87158203
,
"special"
:
false
,
"text"
:
" technique"
},
{
"id"
:
393
,
"logprob"
:
-0.91015625
,
"special"
:
false
,
"text"
:
" that"
}
]
},
"generated_text"
:
"
\n
Deep learning is a machine learning technique that"
}
]
This diff is collapsed.
Click to expand it.
integration-tests/models/test_flash_llama_gptq.py
0 → 100644
View file @
dca0fe25
import
pytest
@
pytest
.
fixture
(
scope
=
"module"
)
def
flash_llama_gptq_handle
(
launcher
):
with
launcher
(
"huggingface/llama-7b-gptq"
,
num_shard
=
2
,
quantize
=
"gptq"
)
as
handle
:
yield
handle
@
pytest
.
fixture
(
scope
=
"module"
)
async
def
flash_llama_gptq
(
flash_llama_gptq_handle
):
await
flash_llama_gptq_handle
.
health
(
300
)
return
flash_llama_gptq_handle
.
client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
async
def
test_flash_llama_gptq
(
flash_llama_gptq
,
response_snapshot
):
response
=
await
flash_llama_gptq
.
generate
(
"What is deep learning ?"
,
max_new_tokens
=
10
,
decoder_input_details
=
True
)
assert
response
.
details
.
generated_tokens
==
10
assert
response
==
response_snapshot
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
async
def
test_flash_llama_gptq_all_params
(
flash_llama_gptq
,
response_snapshot
):
response
=
await
flash_llama_gptq
.
generate
(
"What is deep learning ?"
,
max_new_tokens
=
10
,
repetition_penalty
=
1.2
,
return_full_text
=
True
,
stop_sequences
=
[
"test"
],
temperature
=
0.5
,
top_p
=
0.9
,
top_k
=
10
,
truncate
=
5
,
typical_p
=
0.9
,
watermark
=
True
,
decoder_input_details
=
True
,
seed
=
0
,
)
assert
response
.
details
.
generated_tokens
==
10
assert
response
==
response_snapshot
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
private
async
def
test_flash_llama_gptq_load
(
flash_llama_gptq
,
generate_load
,
response_snapshot
):
responses
=
await
generate_load
(
flash_llama_gptq
,
"What is deep learning ?"
,
max_new_tokens
=
10
,
n
=
4
)
assert
len
(
responses
)
==
4
assert
all
([
r
.
generated_text
==
responses
[
0
].
generated_text
for
r
in
responses
])
assert
responses
==
response_snapshot
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment