Skip to content

Commit e2618be

Browse files
authored
ENH: Refine request log and add optional request_id (#2173)
1 parent c60e8fd commit e2618be

File tree

6 files changed

+156
-43
lines changed

6 files changed

+156
-43
lines changed

xinference/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def get_xinference_home() -> str:
6363
XINFERENCE_DEFAULT_LOG_FILE_NAME = "xinference.log"
6464
XINFERENCE_LOG_MAX_BYTES = 100 * 1024 * 1024
6565
XINFERENCE_LOG_BACKUP_COUNT = 30
66+
XINFERENCE_LOG_ARG_MAX_LENGTH = 100
6667
XINFERENCE_HEALTH_CHECK_FAILURE_THRESHOLD = int(
6768
os.environ.get(XINFERENCE_ENV_HEALTH_CHECK_FAILURE_THRESHOLD, 5)
6869
)

xinference/core/model.py

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import os
2020
import time
2121
import types
22+
import uuid
2223
import weakref
2324
from asyncio.queues import Queue
2425
from asyncio.tasks import wait_for
@@ -444,18 +445,30 @@ async def _call_wrapper(self, output_type: str, fn: Callable, *args, **kwargs):
444445
@log_async(logger=logger)
445446
async def generate(self, prompt: str, *args, **kwargs):
446447
if self.allow_batching():
448+
# not support request_id
449+
kwargs.pop("request_id", None)
447450
return await self.handle_batching_request(
448451
prompt, "generate", *args, **kwargs
449452
)
450453
else:
451454
kwargs.pop("raw_params", None)
452455
if hasattr(self._model, "generate"):
456+
# not support request_id
457+
kwargs.pop("request_id", None)
453458
return await self._call_wrapper_json(
454459
self._model.generate, prompt, *args, **kwargs
455460
)
456461
if hasattr(self._model, "async_generate"):
462+
if "request_id" not in kwargs:
463+
kwargs["request_id"] = str(uuid.uuid1())
464+
else:
465+
# model only accept string
466+
kwargs["request_id"] = str(kwargs["request_id"])
457467
return await self._call_wrapper_json(
458-
self._model.async_generate, prompt, *args, **kwargs
468+
self._model.async_generate,
469+
prompt,
470+
*args,
471+
**kwargs,
459472
)
460473
raise AttributeError(f"Model {self._model.model_spec} is not for generate.")
461474

@@ -534,17 +547,26 @@ async def chat(self, messages: List[Dict], *args, **kwargs):
534547
response = None
535548
try:
536549
if self.allow_batching():
550+
# not support request_id
551+
kwargs.pop("request_id", None)
537552
return await self.handle_batching_request(
538553
messages, "chat", *args, **kwargs
539554
)
540555
else:
541556
kwargs.pop("raw_params", None)
542557
if hasattr(self._model, "chat"):
558+
# not support request_id
559+
kwargs.pop("request_id", None)
543560
response = await self._call_wrapper_json(
544561
self._model.chat, messages, *args, **kwargs
545562
)
546563
return response
547564
if hasattr(self._model, "async_chat"):
565+
if "request_id" not in kwargs:
566+
kwargs["request_id"] = str(uuid.uuid1())
567+
else:
568+
# model only accept string
569+
kwargs["request_id"] = str(kwargs["request_id"])
548570
response = await self._call_wrapper_json(
549571
self._model.async_chat, messages, *args, **kwargs
550572
)
@@ -577,9 +599,10 @@ async def abort_request(self, request_id: str) -> str:
577599
return await self._scheduler_ref.abort_request(request_id)
578600
return AbortRequestMessage.NO_OP.name
579601

580-
@log_async(logger=logger)
581602
@request_limit
603+
@log_async(logger=logger)
582604
async def create_embedding(self, input: Union[str, List[str]], *args, **kwargs):
605+
kwargs.pop("request_id", None)
583606
if hasattr(self._model, "create_embedding"):
584607
return await self._call_wrapper_json(
585608
self._model.create_embedding, input, *args, **kwargs
@@ -589,8 +612,8 @@ async def create_embedding(self, input: Union[str, List[str]], *args, **kwargs):
589612
f"Model {self._model.model_spec} is not for creating embedding."
590613
)
591614

592-
@log_async(logger=logger)
593615
@request_limit
616+
@log_async(logger=logger)
594617
async def rerank(
595618
self,
596619
documents: List[str],
@@ -602,6 +625,7 @@ async def rerank(
602625
*args,
603626
**kwargs,
604627
):
628+
kwargs.pop("request_id", None)
605629
if hasattr(self._model, "rerank"):
606630
return await self._call_wrapper_json(
607631
self._model.rerank,
@@ -616,8 +640,8 @@ async def rerank(
616640
)
617641
raise AttributeError(f"Model {self._model.model_spec} is not for reranking.")
618642

619-
@log_async(logger=logger, args_formatter=lambda _, kwargs: kwargs.pop("audio"))
620643
@request_limit
644+
@log_async(logger=logger, ignore_kwargs=["audio"])
621645
async def transcriptions(
622646
self,
623647
audio: bytes,
@@ -626,7 +650,9 @@ async def transcriptions(
626650
response_format: str = "json",
627651
temperature: float = 0,
628652
timestamp_granularities: Optional[List[str]] = None,
653+
**kwargs,
629654
):
655+
kwargs.pop("request_id", None)
630656
if hasattr(self._model, "transcriptions"):
631657
return await self._call_wrapper_json(
632658
self._model.transcriptions,
@@ -641,8 +667,8 @@ async def transcriptions(
641667
f"Model {self._model.model_spec} is not for creating transcriptions."
642668
)
643669

644-
@log_async(logger=logger, args_formatter=lambda _, kwargs: kwargs.pop("audio"))
645670
@request_limit
671+
@log_async(logger=logger, ignore_kwargs=["audio"])
646672
async def translations(
647673
self,
648674
audio: bytes,
@@ -651,7 +677,9 @@ async def translations(
651677
response_format: str = "json",
652678
temperature: float = 0,
653679
timestamp_granularities: Optional[List[str]] = None,
680+
**kwargs,
654681
):
682+
kwargs.pop("request_id", None)
655683
if hasattr(self._model, "translations"):
656684
return await self._call_wrapper_json(
657685
self._model.translations,
@@ -668,10 +696,7 @@ async def translations(
668696

669697
@request_limit
670698
@xo.generator
671-
@log_async(
672-
logger=logger,
673-
args_formatter=lambda _, kwargs: kwargs.pop("prompt_speech", None),
674-
)
699+
@log_async(logger=logger, ignore_kwargs=["prompt_speech"])
675700
async def speech(
676701
self,
677702
input: str,
@@ -681,6 +706,7 @@ async def speech(
681706
stream: bool = False,
682707
**kwargs,
683708
):
709+
kwargs.pop("request_id", None)
684710
if hasattr(self._model, "speech"):
685711
return await self._call_wrapper_binary(
686712
self._model.speech,
@@ -695,8 +721,8 @@ async def speech(
695721
f"Model {self._model.model_spec} is not for creating speech."
696722
)
697723

698-
@log_async(logger=logger)
699724
@request_limit
725+
@log_async(logger=logger)
700726
async def text_to_image(
701727
self,
702728
prompt: str,
@@ -706,6 +732,7 @@ async def text_to_image(
706732
*args,
707733
**kwargs,
708734
):
735+
kwargs.pop("request_id", None)
709736
if hasattr(self._model, "text_to_image"):
710737
return await self._call_wrapper_json(
711738
self._model.text_to_image,
@@ -720,6 +747,10 @@ async def text_to_image(
720747
f"Model {self._model.model_spec} is not for creating image."
721748
)
722749

750+
@log_async(
751+
logger=logger,
752+
ignore_kwargs=["image"],
753+
)
723754
async def image_to_image(
724755
self,
725756
image: "PIL.Image",
@@ -731,6 +762,7 @@ async def image_to_image(
731762
*args,
732763
**kwargs,
733764
):
765+
kwargs.pop("request_id", None)
734766
if hasattr(self._model, "image_to_image"):
735767
return await self._call_wrapper_json(
736768
self._model.image_to_image,
@@ -747,6 +779,10 @@ async def image_to_image(
747779
f"Model {self._model.model_spec} is not for creating image."
748780
)
749781

782+
@log_async(
783+
logger=logger,
784+
ignore_kwargs=["image"],
785+
)
750786
async def inpainting(
751787
self,
752788
image: "PIL.Image",
@@ -759,6 +795,7 @@ async def inpainting(
759795
*args,
760796
**kwargs,
761797
):
798+
kwargs.pop("request_id", None)
762799
if hasattr(self._model, "inpainting"):
763800
return await self._call_wrapper_json(
764801
self._model.inpainting,
@@ -776,12 +813,13 @@ async def inpainting(
776813
f"Model {self._model.model_spec} is not for creating image."
777814
)
778815

779-
@log_async(logger=logger)
780816
@request_limit
817+
@log_async(logger=logger, ignore_kwargs=["image"])
781818
async def infer(
782819
self,
783820
**kwargs,
784821
):
822+
kwargs.pop("request_id", None)
785823
if hasattr(self._model, "infer"):
786824
return await self._call_wrapper_json(
787825
self._model.infer,
@@ -791,15 +829,16 @@ async def infer(
791829
f"Model {self._model.model_spec} is not for flexible infer."
792830
)
793831

794-
@log_async(logger=logger)
795832
@request_limit
833+
@log_async(logger=logger)
796834
async def text_to_video(
797835
self,
798836
prompt: str,
799837
n: int = 1,
800838
*args,
801839
**kwargs,
802840
):
841+
kwargs.pop("request_id", None)
803842
if hasattr(self._model, "text_to_video"):
804843
return await self._call_wrapper_json(
805844
self._model.text_to_video,

xinference/core/utils.py

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,62 +11,120 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import copy
1514
import logging
1615
import os
1716
import random
1817
import string
19-
from typing import Dict, Generator, List, Tuple, Union
18+
import uuid
19+
from typing import Dict, Generator, List, Optional, Tuple, Union
2020

2121
import orjson
2222
from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
2323

2424
from .._compat import BaseModel
25+
from ..constants import XINFERENCE_LOG_ARG_MAX_LENGTH
2526

2627
logger = logging.getLogger(__name__)
2728

2829

29-
def log_async(logger, args_formatter=None):
30+
def truncate_log_arg(arg) -> str:
31+
s = str(arg)
32+
if len(s) > XINFERENCE_LOG_ARG_MAX_LENGTH:
33+
s = s[0:XINFERENCE_LOG_ARG_MAX_LENGTH] + "..."
34+
return s
35+
36+
37+
def log_async(
38+
logger,
39+
level=logging.DEBUG,
40+
ignore_kwargs: Optional[List[str]] = None,
41+
log_exception=True,
42+
):
3043
import time
3144
from functools import wraps
3245

3346
def decorator(func):
47+
func_name = func.__name__
48+
3449
@wraps(func)
3550
async def wrapped(*args, **kwargs):
36-
if args_formatter is not None:
37-
formatted_args, formatted_kwargs = copy.copy(args), copy.copy(kwargs)
38-
args_formatter(formatted_args, formatted_kwargs)
39-
else:
40-
formatted_args, formatted_kwargs = args, kwargs
41-
logger.debug(
42-
f"Enter {func.__name__}, args: {formatted_args}, kwargs: {formatted_kwargs}"
51+
request_id_str = kwargs.get("request_id", "")
52+
if not request_id_str:
53+
request_id_str = uuid.uuid1()
54+
request_id_str = f"[request {request_id_str}]"
55+
formatted_args = ",".join(map(truncate_log_arg, args))
56+
formatted_kwargs = ",".join(
57+
[
58+
"%s=%s" % (k, truncate_log_arg(v))
59+
for k, v in kwargs.items()
60+
if ignore_kwargs is None or k not in ignore_kwargs
61+
]
4362
)
44-
start = time.time()
45-
ret = await func(*args, **kwargs)
46-
logger.debug(
47-
f"Leave {func.__name__}, elapsed time: {int(time.time() - start)} s"
63+
logger.log(
64+
level,
65+
f"{request_id_str} Enter {func_name}, args: {formatted_args}, kwargs: {formatted_kwargs}",
4866
)
49-
return ret
67+
start = time.time()
68+
try:
69+
ret = await func(*args, **kwargs)
70+
logger.log(
71+
level,
72+
f"{request_id_str} Leave {func_name}, elapsed time: {int(time.time() - start)} s",
73+
)
74+
return ret
75+
except Exception as e:
76+
if log_exception:
77+
logger.error(
78+
f"{request_id_str} Leave {func_name}, error: {e}, elapsed time: {int(time.time() - start)} s",
79+
exc_info=True,
80+
)
81+
else:
82+
logger.log(
83+
level,
84+
f"{request_id_str} Leave {func_name}, error: {e}, elapsed time: {int(time.time() - start)} s",
85+
)
86+
raise
5087

5188
return wrapped
5289

5390
return decorator
5491

5592

56-
def log_sync(logger):
93+
def log_sync(logger, level=logging.DEBUG, log_exception=True):
5794
import time
5895
from functools import wraps
5996

6097
def decorator(func):
6198
@wraps(func)
6299
def wrapped(*args, **kwargs):
63-
logger.debug(f"Enter {func.__name__}, args: {args}, kwargs: {kwargs}")
64-
start = time.time()
65-
ret = func(*args, **kwargs)
66-
logger.debug(
67-
f"Leave {func.__name__}, elapsed time: {int(time.time() - start)} s"
100+
formatted_args = ",".join(map(truncate_log_arg, args))
101+
formatted_kwargs = ",".join(
102+
map(lambda x: "%s=%s" % (x[0], truncate_log_arg(x[1])), kwargs.items())
68103
)
69-
return ret
104+
logger.log(
105+
level,
106+
f"Enter {func.__name__}, args: {formatted_args}, kwargs: {formatted_kwargs}",
107+
)
108+
start = time.time()
109+
try:
110+
ret = func(*args, **kwargs)
111+
logger.log(
112+
level,
113+
f"Leave {func.__name__}, elapsed time: {int(time.time() - start)} s",
114+
)
115+
return ret
116+
except Exception as e:
117+
if log_exception:
118+
logger.error(
119+
f"Leave {func.__name__}, error: {e}, elapsed time: {int(time.time() - start)} s",
120+
exc_info=True,
121+
)
122+
else:
123+
logger.log(
124+
level,
125+
f"Leave {func.__name__}, error: {e}, elapsed time: {int(time.time() - start)} s",
126+
)
127+
raise
70128

71129
return wrapped
72130

0 commit comments

Comments
 (0)