Skip to content

Commit cbad406

Browse files
committed
add benchmarks ; add explicit #[inline] annotations
This diff adds benchmarks to get more info regarding Issue #1. It appears that the remaining difference between the "simple" case and the "cargo" case is the result of a difference in performance between using `match` and `if` for tight loops. I suspect it's because of the way that match arms get reordered: if I manually reorder the "if" statement, I can reproduce the match performance. Also added a couple #[inline] annotations in tables.rs, though the difference in performance in my measurements is negligible. Bumped version number to 0.1.1.
1 parent 1d27b56 commit cbad406

File tree

8 files changed

+169
-67
lines changed

8 files changed

+169
-67
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
target
22
Cargo.lock
3+
scripts/tmp

.travis.yml

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ script:
66
- cargo clean
77
- cargo build --verbose --features default
88
- cargo test --verbose --features default
9+
- cargo bench --verbose --features default
910
- rustdoc --test README.md -L target/debug -L target/debug/deps
1011
- cargo doc
1112
after_success: |

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "unicode-width"
4-
version = "0.1.0"
4+
version = "0.1.1"
55
authors = ["kwantam <[email protected]>"]
66

77
homepage = "https://github.com/unicode-rs/unicode-width"

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,5 @@ to your `Cargo.toml`:
3333

3434
```toml
3535
[dependencies]
36-
unicode-width = "0.1.0"
36+
unicode-width = "0.1.1"
3737
```

scripts/unicode.py

+2
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ def emit_charwidth_module(f, width_table):
206206
#[cfg(feature = "no_std")]
207207
use core::result::Result::{Ok, Err};
208208
209+
#[inline]
209210
fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
210211
#[cfg(feature = "no_std")]
211212
use core::cmp::Ordering::{Equal, Less, Greater};
@@ -226,6 +227,7 @@ def emit_charwidth_module(f, width_table):
226227
""")
227228

228229
f.write("""
230+
#[inline]
229231
pub fn width(c: char, is_cjk: bool) -> Option<usize> {
230232
match c as usize {
231233
_c @ 0 => Some(0), // null is zero width

src/lib.rs

+13-65
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
//!
4040
//! ```toml
4141
//! [dependencies]
42-
//! unicode-width = "0.1.0"
42+
//! unicode-width = "0.1.1"
4343
//! ```
4444
4545
#![deny(missing_docs, unsafe_code)]
@@ -49,6 +49,8 @@
4949
#![cfg_attr(feature = "no_std", no_std)]
5050
#![cfg_attr(feature = "no_std", feature(no_std, core))]
5151

52+
#![cfg_attr(test, feature(test, unicode))]
53+
5254
#[cfg(feature = "no_std")]
5355
#[macro_use]
5456
extern crate core;
@@ -57,6 +59,9 @@ extern crate core;
5759
#[macro_use]
5860
extern crate std;
5961

62+
#[cfg(test)]
63+
extern crate test;
64+
6065
#[cfg(feature = "no_std")]
6166
use core::prelude::*;
6267

@@ -70,6 +75,9 @@ use std::ops::Add;
7075

7176
mod tables;
7277

78+
#[cfg(test)]
79+
mod tests;
80+
7381
/// Methods for determining displayed width of Unicode characters.
7482
pub trait UnicodeWidthChar {
7583
/// Returns the character's displayed width in columns, or `None` if the
@@ -92,8 +100,10 @@ pub trait UnicodeWidthChar {
92100
}
93101

94102
impl UnicodeWidthChar for char {
103+
#[inline]
95104
fn width(self) -> Option<usize> { cw::width(self, false) }
96105

106+
#[inline]
97107
fn width_cjk(self) -> Option<usize> { cw::width(self, true) }
98108
}
99109

@@ -121,75 +131,13 @@ pub trait UnicodeWidthStr {
121131
}
122132

123133
impl UnicodeWidthStr for str {
134+
#[inline]
124135
fn width(&self) -> usize {
125136
self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add)
126137
}
127138

139+
#[inline]
128140
fn width_cjk(&self) -> usize {
129141
self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add)
130142
}
131143
}
132-
133-
#[cfg(test)]
134-
mod tests {
135-
#[test]
136-
fn test_str() {
137-
use super::UnicodeWidthStr;
138-
139-
assert_eq!(UnicodeWidthStr::width("hello"), 10);
140-
assert_eq!("hello".width_cjk(), 10);
141-
assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0);
142-
assert_eq!("\0\0\0\x01\x01".width_cjk(), 0);
143-
assert_eq!(UnicodeWidthStr::width(""), 0);
144-
assert_eq!("".width_cjk(), 0);
145-
assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4);
146-
assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8);
147-
}
148-
149-
#[test]
150-
fn test_char() {
151-
use super::UnicodeWidthChar;
152-
#[cfg(feature = "no_std")]
153-
use core::option::Option::{Some, None};
154-
155-
assert_eq!(UnicodeWidthChar::width('h'), Some(2));
156-
assert_eq!('h'.width_cjk(), Some(2));
157-
assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
158-
assert_eq!('\x00'.width_cjk(), Some(0));
159-
assert_eq!(UnicodeWidthChar::width('\x01'), None);
160-
assert_eq!('\x01'.width_cjk(), None);
161-
assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1));
162-
assert_eq!('\u{2081}'.width_cjk(), Some(2));
163-
}
164-
165-
#[test]
166-
fn test_char2() {
167-
use super::UnicodeWidthChar;
168-
#[cfg(feature = "no_std")]
169-
use core::option::Option::{Some, None};
170-
171-
assert_eq!(UnicodeWidthChar::width('\x00'),Some(0));
172-
assert_eq!('\x00'.width_cjk(),Some(0));
173-
174-
assert_eq!(UnicodeWidthChar::width('\x0A'),None);
175-
assert_eq!('\x0A'.width_cjk(),None);
176-
177-
assert_eq!(UnicodeWidthChar::width('w'),Some(1));
178-
assert_eq!('w'.width_cjk(),Some(1));
179-
180-
assert_eq!(UnicodeWidthChar::width('h'),Some(2));
181-
assert_eq!('h'.width_cjk(),Some(2));
182-
183-
assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1));
184-
assert_eq!('\u{AD}'.width_cjk(),Some(1));
185-
186-
assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
187-
assert_eq!('\u{1160}'.width_cjk(),Some(0));
188-
189-
assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1));
190-
assert_eq!('\u{a1}'.width_cjk(),Some(2));
191-
192-
assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0));
193-
assert_eq!('\u{300}'.width_cjk(),Some(0));
194-
}
195-
}

src/tables.rs

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub mod charwidth {
2424
#[cfg(feature = "no_std")]
2525
use core::result::Result::{Ok, Err};
2626

27+
#[inline]
2728
fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
2829
#[cfg(feature = "no_std")]
2930
use core::cmp::Ordering::{Equal, Less, Greater};
@@ -42,6 +43,7 @@ pub mod charwidth {
4243
}
4344
}
4445

46+
#[inline]
4547
pub fn width(c: char, is_cjk: bool) -> Option<usize> {
4648
match c as usize {
4749
_c @ 0 => Some(0), // null is zero width

src/tests.rs

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use std::iter;
12+
use test::{self, Bencher};
13+
14+
use super::UnicodeWidthChar;
15+
16+
#[cfg(feature = "no_std")]
17+
use std::prelude::v1::*;
18+
19+
#[bench]
20+
fn cargo(b: &mut Bencher) {
21+
let string = iter::repeat('a').take(4096).collect::<String>();
22+
23+
b.iter(|| {
24+
for c in string.chars() {
25+
test::black_box(UnicodeWidthChar::width(c));
26+
}
27+
});
28+
}
29+
30+
#[bench]
31+
fn stdlib(b: &mut Bencher) {
32+
let string = iter::repeat('a').take(4096).collect::<String>();
33+
34+
b.iter(|| {
35+
for c in string.chars() {
36+
test::black_box(c.width(false));
37+
}
38+
});
39+
}
40+
41+
#[bench]
42+
fn simple_if(b: &mut Bencher) {
43+
let string = iter::repeat('a').take(4096).collect::<String>();
44+
45+
b.iter(|| {
46+
for c in string.chars() {
47+
test::black_box(simple_width_if(c));
48+
}
49+
});
50+
}
51+
52+
#[bench]
53+
fn simple_match(b: &mut Bencher) {
54+
let string = iter::repeat('a').take(4096).collect::<String>();
55+
56+
b.iter(|| {
57+
for c in string.chars() {
58+
test::black_box(simple_width_match(c));
59+
}
60+
});
61+
}
62+
63+
#[inline]
64+
fn simple_width_if(c: char) -> Option<usize> {
65+
let cu = c as u32;
66+
if cu < 127 {
67+
if cu > 31 {
68+
Some(1)
69+
} else if cu == 0 {
70+
Some(0)
71+
} else {
72+
None
73+
}
74+
} else {
75+
UnicodeWidthChar::width(c)
76+
}
77+
}
78+
79+
#[inline]
80+
fn simple_width_match(c: char) -> Option<usize> {
81+
match c as u32 {
82+
cu if cu == 0 => Some(0),
83+
cu if cu < 0x20 => None,
84+
cu if cu < 0x7f => Some(1),
85+
_ => UnicodeWidthChar::width(c)
86+
}
87+
}
88+
89+
#[test]
90+
fn test_str() {
91+
use super::UnicodeWidthStr;
92+
93+
assert_eq!(UnicodeWidthStr::width("hello"), 10);
94+
assert_eq!("hello".width_cjk(), 10);
95+
assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0);
96+
assert_eq!("\0\0\0\x01\x01".width_cjk(), 0);
97+
assert_eq!(UnicodeWidthStr::width(""), 0);
98+
assert_eq!("".width_cjk(), 0);
99+
assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4);
100+
assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8);
101+
}
102+
103+
#[test]
104+
fn test_char() {
105+
use super::UnicodeWidthChar;
106+
#[cfg(feature = "no_std")]
107+
use core::option::Option::{Some, None};
108+
109+
assert_eq!(UnicodeWidthChar::width('h'), Some(2));
110+
assert_eq!('h'.width_cjk(), Some(2));
111+
assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
112+
assert_eq!('\x00'.width_cjk(), Some(0));
113+
assert_eq!(UnicodeWidthChar::width('\x01'), None);
114+
assert_eq!('\x01'.width_cjk(), None);
115+
assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1));
116+
assert_eq!('\u{2081}'.width_cjk(), Some(2));
117+
}
118+
119+
#[test]
120+
fn test_char2() {
121+
use super::UnicodeWidthChar;
122+
#[cfg(feature = "no_std")]
123+
use core::option::Option::{Some, None};
124+
125+
assert_eq!(UnicodeWidthChar::width('\x00'),Some(0));
126+
assert_eq!('\x00'.width_cjk(),Some(0));
127+
128+
assert_eq!(UnicodeWidthChar::width('\x0A'),None);
129+
assert_eq!('\x0A'.width_cjk(),None);
130+
131+
assert_eq!(UnicodeWidthChar::width('w'),Some(1));
132+
assert_eq!('w'.width_cjk(),Some(1));
133+
134+
assert_eq!(UnicodeWidthChar::width('h'),Some(2));
135+
assert_eq!('h'.width_cjk(),Some(2));
136+
137+
assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1));
138+
assert_eq!('\u{AD}'.width_cjk(),Some(1));
139+
140+
assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
141+
assert_eq!('\u{1160}'.width_cjk(),Some(0));
142+
143+
assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1));
144+
assert_eq!('\u{a1}'.width_cjk(),Some(2));
145+
146+
assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0));
147+
assert_eq!('\u{300}'.width_cjk(),Some(0));
148+
}

0 commit comments

Comments
 (0)