@@ -73,7 +73,6 @@ def get_gen_param(cap,
7373def main (model_path ,
7474 session_id : int = 1 ,
7575 cap : str = 'chat' ,
76- sys_instruct : str = None ,
7776 tp = 1 ,
7877 stream_output = True ,
7978 ** kwargs ):
@@ -85,8 +84,6 @@ def main(model_path,
8584 session_id (int): the identical id of a session
8685 cap (str): the capability of a model. For example, codellama has
8786 the ability among ['completion', 'infilling', 'chat', 'python']
88- sys_instruct (str): the content of 'system' role, which is used by
89- conversational model
9087 tp (int): GPU number used in tensor parallelism
9188 stream_output (bool): indicator for streaming output or not
9289 **kwarg (dict): other arguments for initializing model's chat template
@@ -100,9 +97,7 @@ def main(model_path,
10097 step = 0
10198 seed = random .getrandbits (64 )
10299 model_name = tm_model .model_name
103- model = MODELS .get (model_name )(capability = cap , ** kwargs ) \
104- if sys_instruct is None else MODELS .get (model_name )(
105- capability = cap , system = sys_instruct , ** kwargs )
100+ model = MODELS .get (model_name )(capability = cap , ** kwargs )
106101
107102 print (f'session { session_id } ' )
108103 while True :
0 commit comments