66import tqdm
77from logger import logger
88
9- from gllm .async_worker import AsyncWorker , run_worker_async
109from gllm .comm import IPCPackage , zmqComm
1110from gllm .id_allocator import IDAllocator
12- from gllm .model_runner import ModelRunner
11+ from gllm .model_runner import ModelRunner , AsyncModelRunner
1312from gllm .sequence import Sequence
1413from gllm .utils import get_model_load_pbar , init_logger , random_uuid
15- from gllm .worker import Worker , run_worker
14+ from gllm .worker import Worker , AsyncWorker , run_worker
1615
1716
1817class LLM :
@@ -40,6 +39,7 @@ def __init__(
4039 assigned_layers = None ,
4140 schedule_method = "chunked_prefill" ,
4241 use_async_worker = False ,
42+ async_scheduling = False ,
4343 use_thinking = True ,
4444 disable_cuda_graph = False ,
4545 max_cuda_graph_bs = 32 ,
@@ -50,7 +50,8 @@ def __init__(
5050 init_logger ()
5151 self .model_path = model_path
5252 self .load_format = load_format
53- self .model_runner = ModelRunner (
53+ model_runner_cls = AsyncModelRunner if async_scheduling else ModelRunner
54+ self .model_runner = model_runner_cls (
5455 load_format = load_format ,
5556 model_path = model_path ,
5657 gpu_memory_util = gpu_memory_util ,
@@ -90,8 +91,11 @@ def __init__(
9091 self .assigned_layers = assigned_layers
9192 self .schedule_method = schedule_method
9293 self .use_async_worker = use_async_worker
94+ self .async_scheduling = async_scheduling
9395
9496 logger .info (f"Schedule method: { schedule_method } " )
97+ if async_scheduling :
98+ logger .info ("Async scheduling enabled" )
9599
96100 # Interact with workers
97101 self .wait_lists : List [Sequence ] = []
@@ -168,7 +172,7 @@ def init_workers(self):
168172 self .load_progress ()
169173
170174 def start_worker (self , local_rank , pp_rank , tp_rank ):
171- worker_cls = Worker if not self .use_async_worker else AsyncWorker
175+ worker_cls = Worker if not self .async_scheduling else AsyncWorker
172176 comm = zmqComm (
173177 self .host ,
174178 self .zmq_port_base ,
@@ -195,7 +199,7 @@ def start_worker(self, local_rank, pp_rank, tp_rank):
195199 self .schedule_method ,
196200 )
197201 process = self .ctx .Process (
198- target = run_worker if not self . use_async_worker else run_worker_async ,
202+ target = run_worker ,
199203 args = (worker ,),
200204 daemon = True ,
201205 )
0 commit comments