Skip to content

Commit

Permalink
Collect level runs during explicit::compute().
Browse files Browse the repository at this point in the history
We can easily accumulate the level runs as part of the initial explicit::compute()
pass over the text; this avoids the need for a separate pass over the levels array
at the beginning of prepare::isolating_run_sequences to collect them.
  • Loading branch information
jfkthame committed Mar 3, 2024
1 parent 893c4fe commit 77ca01f
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 9 deletions.
24 changes: 24 additions & 0 deletions src/explicit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,24 @@ use super::char_data::{
BidiClass::{self, *},
};
use super::level::Level;
use super::prepare::removed_by_x9;
use super::LevelRunVec;
use super::TextSource;

/// Compute explicit embedding levels for one paragraph of text (X1-X8).
///
/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
/// for each char in `text`.
///
/// `runs` returns the list of level runs (BD7) of the text.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn compute<'a, T: TextSource<'a> + ?Sized>(
text: &'a T,
para_level: Level,
original_classes: &[BidiClass],
levels: &mut [Level],
processing_classes: &mut [BidiClass],
runs: &mut LevelRunVec,
) {
assert_eq!(text.len(), original_classes.len());

Expand All @@ -51,6 +56,9 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
let mut overflow_embedding_count = 0u32;
let mut valid_isolate_count = 0u32;

let mut current_run_level = Level::ltr();
let mut current_run_start = 0;

for (i, len) in text.indices_lengths() {
let last = stack.last().unwrap();

Expand Down Expand Up @@ -182,6 +190,22 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
levels[i + j] = levels[i];
processing_classes[i + j] = processing_classes[i];
}

// Check if we need to start a new level run.
if i == 0 {
current_run_level = levels[i];
} else {
if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
// End the last run and start a new one.
runs.push(current_run_start..i);
current_run_level = levels[i];
current_run_start = i;
}
}
}

if levels.len() > current_run_start {
runs.push(current_run_start..levels.len());
}
}

Expand Down
5 changes: 4 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ mod prepare;
pub use crate::char_data::{BidiClass, UNICODE_VERSION};
pub use crate::data_source::BidiDataSource;
pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
pub use crate::prepare::LevelRun;
pub use crate::prepare::{LevelRun, LevelRunVec};

#[cfg(feature = "hardcoded-data")]
pub use crate::char_data::{bidi_class, HardcodedBidiData};
Expand Down Expand Up @@ -1099,20 +1099,23 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>

let processing_classes = &mut processing_classes[para.range.clone()];
let levels = &mut levels[para.range.clone()];
let mut level_runs = LevelRunVec::new();

explicit::compute(
text,
para.level,
original_classes,
levels,
processing_classes,
&mut level_runs,
);

let mut sequences = prepare::IsolatingRunSequenceVec::new();
prepare::isolating_run_sequences(
para.level,
original_classes,
levels,
level_runs,
has_isolate_controls,
&mut sequences,
);
Expand Down
53 changes: 45 additions & 8 deletions src/prepare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ use super::BidiClass::{self, *};
/// Represented as a range of byte indices.
pub type LevelRun = Range<usize>;

#[cfg(feature = "smallvec")]
pub type LevelRunVec = SmallVec<[LevelRun; 8]>;
#[cfg(not(feature = "smallvec"))]
pub type LevelRunVec = Vec<LevelRun>;

/// Output of `isolating_run_sequences` (steps X9-X10)
#[derive(Debug, PartialEq)]
pub struct IsolatingRunSequence {
Expand All @@ -50,11 +55,10 @@ pub fn isolating_run_sequences(
para_level: Level,
original_classes: &[BidiClass],
levels: &[Level],
runs: LevelRunVec,
has_isolate_controls: bool,
isolating_run_sequences: &mut IsolatingRunSequenceVec,
) {
let runs = level_runs(levels, original_classes);

// Per http://www.unicode.org/reports/tr9/#BD13:
// "In the absence of isolate initiators, each isolating run sequence in a paragraph
// consists of exactly one level run, and each level run constitutes a separate
Expand Down Expand Up @@ -97,7 +101,7 @@ pub fn isolating_run_sequences(
};

isolating_run_sequences.push(IsolatingRunSequence {
runs: vec![run],
runs: vec![run.clone()],
sos: max(seq_level, pred_level).bidi_class(),
eos: max(end_level, succ_level).bidi_class(),
});
Expand Down Expand Up @@ -272,6 +276,9 @@ impl IsolatingRunSequence {
/// Finds the level runs in a paragraph.
///
/// <http://www.unicode.org/reports/tr9/#BD7>
///
/// Only used for tests; runs are identified during explicit::compute.
#[cfg(test)]
fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
assert_eq!(levels.len(), original_classes.len());

Expand Down Expand Up @@ -332,7 +339,13 @@ mod tests {
let levels = &[0, 1, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(para_level, classes, &Level::vec(levels), false, &mut sequences);
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
false,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
Expand All @@ -346,7 +359,13 @@ mod tests {
let levels = &[0, 0, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(para_level, classes, &Level::vec(levels), true, &mut sequences);
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
true,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
Expand All @@ -360,7 +379,13 @@ mod tests {
let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(para_level, classes, &Level::vec(levels), true, &mut sequences);
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
true,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
Expand All @@ -380,7 +405,13 @@ mod tests {
let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(para_level, classes, &Level::vec(levels), false, &mut sequences);
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
false,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));

// text1
Expand Down Expand Up @@ -440,7 +471,13 @@ mod tests {
let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(para_level, classes, &Level::vec(levels), true, &mut sequences);
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
true,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));

// text1·RLI·PDI·RLI·PDI·text6
Expand Down

0 comments on commit 77ca01f

Please sign in to comment.