Skip to content

Commit

Permalink
server: tests: fix concurrent OAI streaming request
Browse files Browse the repository at this point in the history
  • Loading branch information
phymbert committed Feb 23, 2024
1 parent 77b8589 commit 7183149
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
2 changes: 1 addition & 1 deletion examples/server/tests/features/parallel.feature
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ Feature: Parallel
Examples:
| streaming | n_predict |
| disabled | 128 |
#| enabled | 64 | FIXME: phymbert: need to investigate why in aiohttp with streaming only one token is generated
| enabled | 64 |

Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
Given a prompt:
Expand Down
33 changes: 19 additions & 14 deletions examples/server/tests/features/steps/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,20 +485,25 @@ async def oai_chat_completions(user_prompt,
assert response.status == 200
assert response.headers['Access-Control-Allow-Origin'] == origin
assert response.headers['Content-Type'] == "text/event-stream"

async for line_in_bytes in response.content:
line = line_in_bytes.decode('utf8')
event_data = line.split(': ', 1)
assert event_data[0] == 'data', f'{event_data}'
chunk_raw = event_data[1]

chunk = json.loads(chunk_raw)
assert len(chunk['choices']) == 1
delta = chunk['choices'][0]['delta']
if 'content' in delta:
completion_response['content'] += delta['content']
completion_response['timings']['predicted_n'] += 1
print(f"DEBUG completion_response: {completion_response}")
event_received = True
while event_received:
event_received = False
async for line_in_bytes in response.content:
line = line_in_bytes.decode('utf8')
line = line.rstrip('\n').rstrip('\r')
if line == '':
continue
event_data = line.split(': ', 1)
assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```'
chunk_raw = event_data[1]

chunk = json.loads(chunk_raw)
assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```"
delta = chunk['choices'][0]['delta']
if 'content' in delta:
completion_response['content'] += delta['content']
completion_response['timings']['predicted_n'] += 1
print(f"DEBUG completion_response: {completion_response}")
else:
if expect_api_error is None or not expect_api_error:
assert response.status == 200
Expand Down

0 comments on commit 7183149

Please sign in to comment.