Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions autotest/config_h.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ config:
internlm/Intern-S1-Pro-FP8:
dp: 16
ep: 16
Qwen/Qwen3.5-397B-A17B:
dp: 4
ep: 8
Qwen/Qwen3.5-397B-A17B-FP8:
dp: 4
ep: 8

cp_tp:
Qwen/Qwen3-235B-A22B-Thinking-2507:
Expand Down Expand Up @@ -122,6 +128,8 @@ pytorch_chat_model:
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-35B-A3B-FP8
- Qwen/Qwen3.5-122B-A10B
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8
- THUDM/cogvlm-chat-hf
- THUDM/cogvlm2-llama3-chinese-chat-19B
- THUDM/glm-4v-9b
Expand Down Expand Up @@ -159,6 +167,8 @@ pytorch_vl_model:
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-35B-A3B-FP8
- Qwen/Qwen3.5-122B-A10B
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8
- THUDM/cogvlm-chat-hf
- THUDM/cogvlm2-llama3-chinese-chat-19B
- THUDM/glm-4v-9b
Expand Down Expand Up @@ -283,6 +293,8 @@ pytorch_quantization:
- internlm/Intern-S1
- internlm/Intern-S1-mini
- internlm/Intern-S1-Pro-FP8
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8
no_kvint8:
- zai-org/GLM-4.7-Flash
- zai-org/GLM-5-FP8
Expand All @@ -293,6 +305,10 @@ pytorch_quantization:
- Qwen/Qwen3.5-122B-A10B
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- internlm/Intern-S1-Pro-FP8
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8
fp8:
- Qwen/Qwen3.5-397B-A17B

longtext_benchmark_model:
- Qwen/Qwen3-30B-A3B
Expand Down Expand Up @@ -335,13 +351,19 @@ evaluate_model:
- deepseek-ai/DeepSeek-V3.1
- zai-org/GLM-5-FP8
- internlm/Intern-S1-Pro-FP8
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8

longtext_evaluate_model:
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8

mtp_evaluate_model:
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-35B-A3B-FP8
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8

mllm_evaluate_model:
- OpenGVLab/InternVL3_5-38B
Expand All @@ -352,3 +374,5 @@ mllm_evaluate_model:
- Qwen/Qwen3.5-122B-A10B
- internlm/Intern-S1
- internlm/Intern-S1-mini
- Qwen/Qwen3.5-397B-A17B
- Qwen/Qwen3.5-397B-A17B-FP8
91 changes: 80 additions & 11 deletions autotest/evaluate/test_api_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,21 @@ def _run_proxy_distributed_test(config,
worker_id,
test_type='infer',
manager=None,
eval_config_name='default'):
eval_config_name='default',
eval_subpath=None):
assert manager is not None, 'Manager instance must be provided'

if 'gpt' in run_config.get('model', '').lower():
eval_config_name = 'gpt'
elif 'intern-s1-pro' in run_config.get('model', '').lower():
eval_config_name = 'intern-s1-pro'
elif 'qwen3.5' in run_config.get('model', '').lower():
eval_config_name = 'qwen3.5'
if eval_subpath is None:
if eval_config_name == 'default':
if 'gpt' in run_config.get('model', '').lower():
eval_config_name = 'gpt'
elif 'intern-s1-pro' in run_config.get('model', '').lower():
eval_config_name = 'intern-s1-pro'
elif 'qwen3.5' in run_config.get('model', '').lower():
eval_config_name = 'qwen3.5'

if str(config.get('env_tag')) == 'ascend':
eval_config_name = f'{eval_config_name}-2batch'
if str(config.get('env_tag')) == 'ascend':
eval_config_name = f'{eval_config_name}-2batch'

preset_config = constant.EVAL_CONFIGS.get(eval_config_name, {})
model_name = run_config['model']
Expand All @@ -88,6 +91,9 @@ def _run_proxy_distributed_test(config,
api_server.wait_until_ready()
print(f'🧪 Master node executing {test_type} test ({eval_config_name})...')
eval_path = config.get('eval_path')
if eval_subpath:
eval_path = os.path.join(eval_path, eval_subpath)
os.makedirs(eval_path, exist_ok=True)
case_name = get_case_str_by_config(run_config)

extra_config = {'max-num-workers': 16}
Expand All @@ -98,6 +104,7 @@ def _run_proxy_distributed_test(config,
port=constant.PROXY_PORT,
test_type=test_type,
extra_config=extra_config,
eval_config_name=eval_config_name,
**preset_config)
assert result, f'❌ {test_type} test failed: {msg}'
print(f'✅ {test_type} test passed')
Expand Down Expand Up @@ -282,7 +289,30 @@ def test_pytorch_restful_tp2(config, run_config, worker_id):
),
)
def test_pytorch_restful_tp2_longtext(config, run_config, worker_id):
run_eval_test(config, run_config, worker_id, 'infer', eval_config_name='longtext-256k')
run_eval_test(config, run_config, worker_id, 'infer', eval_subpath='longtext', eval_config_name='longtext-256k')


@pytest.mark.infer
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dp4ep8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize(
'run_config',
get_func_config_list(
'pytorch',
{'dp': 4, 'ep': 8},
func_type='longtext_evaluate',
extra={'session_len': 400000},
),
)
def test_pytorch_restful_distributed_dp4ep8_longtext(shared_proxy_manager, config, run_config, worker_id):
_run_proxy_distributed_test(config=config,
run_config=run_config,
worker_id=worker_id,
test_type='infer',
manager=shared_proxy_manager,
eval_config_name='longtext-256k',
eval_subpath='longtext')


@pytest.mark.infer
Expand Down Expand Up @@ -398,6 +428,19 @@ def test_pytorch_restful_distributed_dpep8(shared_proxy_manager, config, run_con
manager=shared_proxy_manager)


@pytest.mark.infer
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dp4ep8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('run_config', get_func_config_list('pytorch', {'dp': 4, 'ep': 8}, func_type='evaluate'))
def test_pytorch_restful_distributed_dp4ep8(shared_proxy_manager, config, run_config, worker_id):
_run_proxy_distributed_test(config=config,
run_config=run_config,
worker_id=worker_id,
test_type='infer',
manager=shared_proxy_manager)


@pytest.mark.infer
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dpep16
Expand Down Expand Up @@ -515,6 +558,15 @@ def test_pytorch_eval_distributed_dpep8(config, run_config, worker_id):
run_eval_test(config, run_config, worker_id, 'eval')


@pytest.mark.eval
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dp4ep8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('run_config', get_func_config_list('pytorch', {'dp': 4, 'ep': 8}, func_type='evaluate'))
def test_pytorch_eval_distributed_dp4ep8(config, run_config, worker_id):
run_eval_test(config, run_config, worker_id, 'eval')


@pytest.mark.eval
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dpep16
Expand All @@ -538,7 +590,24 @@ def test_pytorch_eval_distributed_dpep16(config, run_config, worker_id):
),
)
def test_pytorch_eval_tp2_longtext(config, run_config, worker_id):
run_eval_test(config, run_config, worker_id, 'eval', eval_config_name='longtext-256k')
run_eval_test(config, run_config, worker_id, 'eval', eval_subpath='longtext', eval_config_name='longtext-256k')


@pytest.mark.eval
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dp4ep8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize(
'run_config',
get_func_config_list(
'pytorch',
{'dp': 4, 'ep': 8},
func_type='longtext_evaluate',
extra={'session_len': 400000},
),
)
def test_pytorch_eval_distributed_dp4ep8_longtext(config, run_config, worker_id):
run_eval_test(config, run_config, worker_id, 'eval', eval_subpath='longtext', eval_config_name='longtext-256k')


@pytest.mark.eval
Expand Down
83 changes: 82 additions & 1 deletion autotest/evaluate/test_mllm_api_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
import os
import time

import pytest
import utils.constant as constant
from utils.config_utils import get_case_str_by_config, get_func_config_list, get_workerid
from utils.evaluate_utils import mllm_eval_test
from utils.proxy_distributed_utils import ApiServerPerTest, proxy_worker_node_wait
from utils.run_restful_chat import start_openai_service, start_proxy_server, stop_restful_api, terminate_restful_api


def run_eval_test(config, run_config, worker_id, test_type='infer', eval_config_name='default'):
def run_eval_test(config, run_config, worker_id, test_type='infer', eval_config_name='default', eval_subpath=None):
if eval_config_name == 'default':
if 'qwen3.5' in run_config.get('model', '').lower():
eval_config_name = 'qwen3.5'
extra_config = constant.MLLM_EVAL_CONFIGS.get(eval_config_name, {})
eval_path = config.get('mllm_eval_path')
if eval_subpath:
eval_path = os.path.join(eval_path, eval_subpath)
os.makedirs(eval_path, exist_ok=True)
case_name = get_case_str_by_config(run_config)
if test_type == 'infer':
proxy_pid, proxy_process = start_proxy_server(config.get('server_log_path'), constant.PROXY_PORT,
Expand Down Expand Up @@ -66,6 +74,57 @@ def run_openai_service_start(i):
stop_restful_api(proxy_pid, proxy_process)


def _run_proxy_distributed_mllm_test(
config,
run_config,
worker_id,
test_type='infer',
manager=None,
eval_config_name='default'):
assert manager is not None, 'Manager instance must be provided'

if eval_config_name == 'default':
if 'qwen3.5' in run_config.get('model', '').lower():
eval_config_name = 'qwen3.5'

if str(config.get('env_tag')) == 'ascend':
eval_config_name = f'{eval_config_name}-2batch'

preset_config = constant.MLLM_EVAL_CONFIGS.get(eval_config_name, {})
model_name = run_config['model']
model_path = os.path.join(config['model_path'], model_name)

api_server = ApiServerPerTest(proxy_manager=manager, config=config, run_config=run_config)
api_server.start()

try:
if manager.is_master:
api_server.wait_until_ready()
print(f'🧪 Master node executing mllm {test_type} test ({eval_config_name})...')
eval_path = config.get('mllm_eval_path')
case_name = get_case_str_by_config(run_config)
extra_config = {'api-nproc': 16}
extra_config.update(preset_config)

result, msg = mllm_eval_test(model_path,
eval_path,
case_name,
port=constant.PROXY_PORT,
test_type=test_type,
extra_config=extra_config)
assert result, f'❌ mllm {test_type} test failed: {msg}'
print(f'✅ mllm {test_type} test passed')

else:
print(f'⏸️ Worker node {manager.node_rank} waiting for master to complete mllm test...')
proxy_worker_node_wait(manager, timeout_minutes=4880)

finally:
api_server.cleanup()
if manager.is_master:
time.sleep(1)


def get_models(backend, parallel_config):
return get_func_config_list(backend,
parallel_config,
Expand Down Expand Up @@ -247,3 +306,25 @@ def test_pytorch_eval_tp8(config, run_config, worker_id):
@pytest.mark.parametrize('run_config', get_models('pytorch', {'tp': 16}))
def test_pytorch_eval_tp16(config, run_config, worker_id):
run_eval_test(config, run_config, worker_id, 'eval')


@pytest.mark.infer
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dp4ep8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('run_config', get_models('pytorch', {'dp': 4, 'ep': 8}))
def test_pytorch_vl_restful_distributed_dp4ep8(shared_proxy_manager, config, run_config, worker_id):
_run_proxy_distributed_mllm_test(config=config,
run_config=run_config,
worker_id=worker_id,
test_type='infer',
manager=shared_proxy_manager)


@pytest.mark.eval
@pytest.mark.pytorch
@pytest.mark.gpu_num_distributed_dp4ep8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('run_config', get_models('pytorch', {'dp': 4, 'ep': 8}))
def test_pytorch_vl_eval_distributed_dp4ep8(config, run_config, worker_id):
run_eval_test(config, run_config, worker_id, 'eval')
Loading
Loading