Skip to content

Commit 12eeeb1

Browse files
committed
lock: add helpers to work with completion variables.
Tasks stuck on completion variables, for example ones executing mlx commands, will be in D state but will not appear in any of the currently supported lock scans. So add a helper to check and inform about tasks stuck on completion variables. Orabug: 38803445 Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
1 parent ad09667 commit 12eeeb1

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

drgn_tools/lock.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from drgn import StackFrame
3838
from drgn.helpers.common import identify_address
3939
from drgn.helpers.linux.cpumask import for_each_present_cpu
40+
from drgn.helpers.linux.list import list_empty
4041
from drgn.helpers.linux.percpu import per_cpu
4142
from drgn.helpers.linux.pid import find_task
4243

@@ -46,6 +47,7 @@
4647
from drgn_tools.deadlock import DependencyGraph
4748
from drgn_tools.locking import _RWSEM_READER_MASK
4849
from drgn_tools.locking import _RWSEM_READER_SHIFT
50+
from drgn_tools.locking import completion_for_each_task
4951
from drgn_tools.locking import for_each_mutex_waiter
5052
from drgn_tools.locking import for_each_rwsem_waiter
5153
from drgn_tools.locking import get_lock_from_frame
@@ -384,6 +386,61 @@ def show_rwsem_lock(
384386
print("")
385387

386388

389+
def show_completion(
390+
prog: Program,
391+
frame_list: List[Tuple[Object, StackFrame]],
392+
stack: bool,
393+
time: Optional[int] = None,
394+
pid: Optional[int] = None,
395+
) -> None:
396+
"""Show completion details"""
397+
wtask = None
398+
if pid is not None:
399+
wtask = find_task(prog, pid)
400+
401+
seen_completions: Set[int] = set()
402+
completion_waiters: Set[int] = set()
403+
completions_done = 0
404+
405+
for task, frame in frame_list:
406+
completion = get_lock_from_frame(prog, task, frame, "completion", "x")
407+
if not completion:
408+
continue
409+
completion_addr = completion.value_()
410+
if completion_addr in seen_completions:
411+
continue
412+
seen_completions.add(completion_addr)
413+
414+
index = 0
415+
addr_info = _addr_info(prog, completion_addr)
416+
print(f"Completion: 0x{completion_addr:x}{addr_info}")
417+
if list_empty(completion.wait.task_list.address_of_()):
418+
completions_done = completions_done + 1
419+
continue
420+
421+
if pid is None:
422+
if time is None:
423+
time = 0
424+
for waiter in completion_for_each_task(completion):
425+
completion_waiters.add(waiter.value_())
426+
waittime = task_lastrun2now(waiter)
427+
timens = time * 1000000000
428+
index = index + 1
429+
if waittime > timens or timens == 0:
430+
show_lock_waiter(prog, waiter, index, stacktrace=stack)
431+
else:
432+
continue
433+
else:
434+
show_lock_waiter(prog, wtask, index, stacktrace=stack)
435+
436+
print("")
437+
438+
print(f"Number of tasks waiting on completions: {len(completion_waiters)}")
439+
print(f"Number of completions: {len(seen_completions)}")
440+
print(f"Number of completions wih no waiters: {completions_done}")
441+
print("")
442+
443+
387444
def scan_sem_lock(
388445
prog: Program,
389446
stack: bool,
@@ -433,6 +490,25 @@ def scan_rwsem_lock(
433490
show_rwsem_lock(prog, frame_list, stack, time, pid)
434491

435492

493+
def scan_completion(
494+
prog: Program,
495+
stack: bool,
496+
time: Optional[int] = None,
497+
pid: Optional[int] = None,
498+
) -> None:
499+
"""Scan for completion variables and show details"""
500+
wtask = None
501+
if pid is not None:
502+
wtask = find_task(prog, pid)
503+
504+
functions = [
505+
"__wait_for_common",
506+
]
507+
frame_list = bt_has_any(prog, functions, wtask, one_per_task=True)
508+
if frame_list:
509+
show_completion(prog, frame_list, stack, time, pid)
510+
511+
436512
def scan_lock(
437513
prog: Program,
438514
stack: bool,

drgn_tools/locking.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from drgn.helpers.linux.sched import cpu_curr
2626
from drgn.helpers.linux.sched import task_cpu
2727
from drgn.helpers.linux.sched import task_state_to_char
28+
from drgn.helpers.linux.wait import waitqueue_for_each_task
2829

2930
from drgn_tools.bt import bt
3031
from drgn_tools.mm import AddrKind
@@ -670,3 +671,35 @@ def get_lock_from_frame(
670671
if is_task_blocked_on_lock(pid, kind, lock):
671672
return lock
672673
return None
674+
675+
676+
# Once next drgn version has been released and we have moved to that
677+
# we will have helpers to traverse completion waiters and swaitq
678+
# Until then we use following helpers (taken from my upstream commit)
679+
def swait_for_each_task(wq: Object) -> Iterable[Object]:
680+
for entry in list_for_each_entry(
681+
"struct swait_queue", wq.task_list.address_of_(), "task_list"
682+
):
683+
yield entry.task
684+
685+
686+
def completion_for_each_task(completion: Object) -> Iterable[Object]:
687+
"""
688+
Iterate over all tasks waiting on a completion variable.
689+
690+
:param completion: ``struct completion *``
691+
:return: Iterator of ``struct task_struct *`` objects.
692+
"""
693+
wait = completion.wait.address_of_()
694+
# completion->wait is a simple wait queue since Linux kernel commit
695+
# a5c6234e1028 ("completion: Use simple wait queues") (in v5.7).
696+
# Also Linux kernel commit 2055da97389a ("sched/wait: Disambiguate
697+
# wq_entry->task_list and wq_head->task_list naming") (in v4.13) renamed
698+
# the task_list member to head.
699+
# So completion->wait in kernels v5.7 and later and in kernels prior to v4.13,
700+
# have task_list member, but of different types. So use type of completion->wait
701+
# to differentiate between completion backends.
702+
if wait.type_.type_name() == "struct swait_queue_head *":
703+
return swait_for_each_task(wait)
704+
else:
705+
return waitqueue_for_each_task(wait)

0 commit comments

Comments
 (0)