Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions encord/common/integer_range_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import bisect
import sys
from typing import List, Optional, Tuple


class IntegerRangeSet:
def __init__(self) -> None:
self._ranges: List[Tuple[int, int]] = []

def add(self, start: int, end: int) -> None:
if start > end:
return
Comment on lines +11 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if an error should be thrown here instead.

Maybe the code that calls this should be the one that checks it I guess? But curious about your thoughts on this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea. There are some scenarios where you don't want to throw an error - where it's particularly expensive or the app "must not crash", but neither apply here


# 1. Find Merge Window
i = bisect.bisect_left(self._ranges, (start, end))
if i > 0 and self._ranges[i - 1][1] >= start - 1:
i -= 1
start = self._ranges[i][0]

j = bisect.bisect_right(self._ranges, (end + 1, sys.maxsize))
if j > i:
end = max(end, self._ranges[j - 1][1])

# 2. Bulk Replace
self._ranges[i:j] = [(start, end)]

def intersection(self, start: int, end: int) -> Optional[List[Tuple[int, int]]]:
"""
Returns a list of overlapping tuples or None.
Complexity: O(log N + K) where K is the number of overlapping fragments.
"""
if start > end or not self._ranges:
return None

# 1. Define Search Window
# We only need to check ranges that could possibly overlap.
# Start Index (lo): The range immediately before where 'start' would fit.
# End Index (hi): The first range that starts strictly AFTER 'end'.
lo = bisect.bisect_right(self._ranges, (start, sys.maxsize)) - 1
hi = bisect.bisect_right(self._ranges, (end, sys.maxsize))

# Ensure lo isn't negative (if start is smaller than the very first range)
lo = max(0, lo)

overlaps: List[Tuple[int, int]] = []

# 2. Iterate only the relevant slice
for i in range(lo, hi):
r_start, r_end = self._ranges[i]

# Calculate mathematical intersection: [max(starts), min(ends)]
o_start = max(start, r_start)
o_end = min(end, r_end)

# If valid range, it's an overlap
if o_start <= o_end:
overlaps.append((o_start, o_end))

return overlaps

def remove(self, start: int, end: int) -> None:
"""Removes a range, splitting or deleting existing ranges."""
if start > end:
return

# 1. Find Search Window
# i: The first range that *could* overlap (starts before the removal ends)
# j: The first range that definitely starts *after* the removal ends
i = bisect.bisect_right(self._ranges, (start, sys.maxsize)) - 1
i = max(0, i)
j = bisect.bisect_right(self._ranges, (end, sys.maxsize))

new_ranges = []

# 2. Iterate only the affected slice
for k in range(i, j):
r_start, r_end = self._ranges[k]

# Optimization: If this range is entirely to the left, keep it as is.
# (This happens because our 'i' search is slightly loose)
if r_end < start:
new_ranges.append((r_start, r_end))
continue

# Check for Left Survivor (Range starts before removal starts)
if r_start < start:
new_ranges.append((r_start, start - 1))

# Check for Right Survivor (Range ends after removal ends)
if r_end > end:
new_ranges.append((end + 1, r_end))

# 3. Bulk Replace the affected slice with the survivors
self._ranges[i:j] = new_ranges

def clear(self) -> None:
self._ranges = []

def __repr__(self) -> str:
return f"RangeSet({self._ranges})"
113 changes: 42 additions & 71 deletions encord/common/range_manager.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,53 @@
from typing import Iterable, List, Optional, Set, Union, cast
from typing import List, Optional, Set, Tuple, cast

from encord.common.integer_range_set import IntegerRangeSet
from encord.objects.frames import Frames, Range, Ranges


class RangeManager:
"""Range Manager class to hold a list of frame ranges, and operate on them."""
"""Range Manager implemented using IntegerRangeSet for optimal performance.

This is a wrapper around IntegerRangeSet that provides the same API as RangeManager
but with improved performance characteristics.
"""

def __init__(self, frame_class: Optional[Frames] = None):
self.ranges: Ranges = []
self._range_set = IntegerRangeSet()

if isinstance(frame_class, int):
self.add_range(Range(start=frame_class, end=frame_class))
self._range_set.add(frame_class, frame_class)
elif isinstance(frame_class, Range):
self.add_range(frame_class)
self._range_set.add(frame_class.start, frame_class.end)
elif isinstance(frame_class, (list, set)):
if all(isinstance(x, int) for x in frame_class):
# Add individual frames
for frame in frame_class:
self.add_range(Range(start=cast(int, frame), end=cast(int, frame)))
self._range_set.add(cast(int, frame), cast(int, frame))
elif all(isinstance(x, Range) for x in frame_class):
self.add_ranges(cast(Ranges, frame_class))
# Add ranges
for r in cast(Ranges, frame_class):
self._range_set.add(r.start, r.end)
elif frame_class is None:
self.ranges = []
pass # Empty range set
else:
raise RuntimeError(f"Unexpected type for frames {type(frame_class)}.")

@property
def ranges(self) -> List[Tuple[int, int]]:
return self._range_set._ranges

def add_range(self, new_range: Range) -> None:
"""Add a range, merging any overlapping ranges."""
if not self.ranges:
self.ranges.append(new_range)
return

merged_ranges = []

# Sort ranges based on the start of each range
for existing_range in sorted(self.ranges, key=lambda r: r.start):
if existing_range.overlaps(new_range):
new_range.merge(existing_range)
else:
merged_ranges.append(existing_range)

merged_ranges.append(new_range) # Add the new (merged) range
self.ranges = sorted(merged_ranges, key=lambda r: r.start)
self._range_set.add(new_range.start, new_range.end)

def add_ranges(self, new_ranges: Ranges) -> None:
"""Add multiple ranges."""
for new_range in new_ranges:
self.add_range(new_range)
self._range_set.add(new_range.start, new_range.end)

def remove_range(self, range_to_remove: Range) -> None:
"""Remove a specific range."""
new_ranges = []

for r in self.ranges:
if not r.overlaps(range_to_remove):
# No overlap
new_ranges.append(r)
else:
# Partial overlap: split if needed
if r.start < range_to_remove.start:
new_ranges.append(Range(r.start, range_to_remove.start - 1))
if r.end > range_to_remove.end:
new_ranges.append(Range(range_to_remove.end + 1, r.end))

self.ranges = new_ranges
"""Remove a specific range using IntegerRangeSet's native remove method."""
self._range_set.remove(range_to_remove.start, range_to_remove.end)

def remove_ranges(self, ranges_to_remove: Ranges) -> None:
"""Remove multiple ranges."""
Expand All @@ -70,49 +56,34 @@ def remove_ranges(self, ranges_to_remove: Ranges) -> None:

def clear_ranges(self) -> None:
"""Clear all ranges."""
self.ranges = []
self._range_set.clear()

def get_ranges(self) -> Ranges:
"""Return the sorted list of merged ranges."""
copied_ranges = [range.copy() for range in self.ranges]

return sorted(copied_ranges, key=lambda r: r.start)
return [Range(start, end) for start, end in self._range_set._ranges]

def get_ranges_as_frames(self) -> Set[int]:
"""Returns set of intersecting frames"""
res = set()
for r in self.ranges:
res.update(list(range(r.start, r.end + 1)))

"""Returns set of intersecting frames."""
res: Set[int] = set()
for start, end in self._range_set._ranges:
res.update(range(start, end + 1))
return res

def intersection(self, other_frame_class: Frames) -> Ranges:
"""Returns list of intersecting ranges"""
intersection_ranges: Ranges = []
other_range_manager = RangeManager(other_frame_class)
other_ranges = other_range_manager.get_ranges()
current_ranges = self.get_ranges()
"""Returns list of intersecting ranges."""
# Convert other_frame_class to ranges
other_manager = RangeManager(other_frame_class)

# If either list of ranges is empty, there is no intersection
if len(other_ranges) == 0 or len(current_ranges) == 0:
if not self._range_set._ranges or not other_manager._range_set._ranges:
return []

# Since ranges are sorted, we can use 2-pointer method to find intersections
current_index, other_index = 0, 0

while current_index < len(current_ranges) and other_index < len(other_ranges):
current_range = current_ranges[current_index]
other_range = other_ranges[other_index]

if current_range.overlaps(other_range):
intersect_start = max(current_range.start, other_range.start)
intersect_end = min(current_range.end, other_range.end)
intersection_ranges.append(Range(intersect_start, intersect_end))
intersection_ranges: Ranges = []

# Move pointer for the range that ends first
if current_range.end < other_range.end:
current_index += 1
else:
other_index += 1
# For each range in self, check intersection with other
for self_start, self_end in self._range_set._ranges:
overlaps = other_manager._range_set.intersection(self_start, self_end)
if overlaps:
for o_start, o_end in overlaps:
intersection_ranges.append(Range(o_start, o_end))

return intersection_ranges
26 changes: 14 additions & 12 deletions encord/objects/classification_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,21 @@ def _set_for_frames(self, frames: Frames, overwrite: bool):
new_range_manager = RangeManager(frame_class=frames)
ranges_to_add = new_range_manager.get_ranges()

is_present, conflicting_ranges = self._is_classification_present_on_frames(ranges_to_add)
if is_present and not overwrite:
location_msg = "globally" if self.is_global() else f"on the ranges {conflicting_ranges}"
raise LabelRowError(
f"The classification '{self.classification_hash}' already exists "
f"{location_msg}."
f"Set 'overwrite' parameter to True to override."
)
else:
self._range_manager.add_ranges(ranges_to_add)
if not overwrite:
# if we're overwriting, we don't really care if there's a conflict
is_present, conflicting_ranges = self._is_classification_present_on_frames(ranges_to_add)
if is_present:
location_msg = "globally" if self.is_global() else f"on the ranges {conflicting_ranges}"
raise LabelRowError(
f"The classification '{self.classification_hash}' already exists "
f"{location_msg}."
f"Set 'overwrite' parameter to True to override."
)

if self._parent:
self._parent._add_frames_to_classification(self, ranges_to_add)
self._range_manager.add_ranges(ranges_to_add)

if self._parent:
self._parent._add_frames_to_classification(self, ranges_to_add)

def set_for_frames(
self,
Expand Down
2 changes: 2 additions & 0 deletions encord/objects/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class Range:
end (int): The ending value of the range.
"""

__slots__ = ["start", "end"] # Ranges are used heavily and are simple, so save memory

start: int
end: int

Expand Down
Loading
Loading