Skip to content

Commit eace8bf

Browse files
committed
feat: add str::to_precomposed_unicode(), effective on apple devices only.
1 parent 4138902 commit eace8bf

File tree

6 files changed

+104
-0
lines changed

6 files changed

+104
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-utils/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ doctest = false
1414

1515
[dependencies]
1616
fastrand = "2.0.0"
17+
unicode-normalization = { version = "0.1.19", default-features = false }
18+

gix-utils/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ pub mod backoff;
1010
///
1111
pub mod buffers;
1212

13+
///
14+
pub mod str;
15+
1316
/// A utility to do buffer-swapping with.
1417
///
1518
/// Use `src` to read from and `dest` to write to, and after actually changing data, call [Buffers::swap()].

gix-utils/src/str.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
use std::borrow::Cow;
2+
use std::ffi::OsStr;
3+
use std::path::Path;
4+
5+
/// Assure that `s` is precomposed, i.e. `ä` is a single code-point, and not two i.e. `a` and `<umlaut>`.
6+
///
7+
/// At the expense of extra-compute, it does nothing if there is no work to be done, returning the original input without allocating.
8+
pub fn precompose(s: Cow<'_, str>) -> Cow<'_, str> {
9+
use unicode_normalization::UnicodeNormalization;
10+
if s.as_ref().nfc().cmp(s.as_ref().chars()).is_eq() {
11+
s
12+
} else {
13+
Cow::Owned(s.as_ref().nfc().collect())
14+
}
15+
}
16+
17+
/// Assure that `s` is decomposed, i.e. `ä` turns into `a` and `<umlaut>`.
18+
///
19+
/// At the expense of extra-compute, it does nothing if there is no work to be done, returning the original input without allocating.
20+
pub fn decompose(s: Cow<'_, str>) -> Cow<'_, str> {
21+
use unicode_normalization::UnicodeNormalization;
22+
if s.as_ref().nfd().cmp(s.as_ref().chars()).is_eq() {
23+
s
24+
} else {
25+
Cow::Owned(s.as_ref().nfd().collect())
26+
}
27+
}
28+
29+
/// Return the precomposed version of `path`, or `path` itself if it contained illformed unicode,
30+
/// or if the unicode version didn't contains decomposed unicode.
31+
/// Otherwise, similar to [`precompose()`]
32+
pub fn precompose_path(path: Cow<'_, Path>) -> Cow<'_, Path> {
33+
match path.to_str() {
34+
None => path,
35+
Some(maybe_decomposed) => match precompose(maybe_decomposed.into()) {
36+
Cow::Borrowed(_) => path,
37+
Cow::Owned(precomposed) => Cow::Owned(precomposed.into()),
38+
},
39+
}
40+
}
41+
42+
/// Return the precomposed version of `name`, or `name` itself if it contained illformed unicode,
43+
/// or if the unicode version didn't contains decomposed unicode.
44+
/// Otherwise, similar to [`precompose()`]
45+
pub fn precompose_os_string(path: Cow<'_, OsStr>) -> Cow<'_, OsStr> {
46+
match path.to_str() {
47+
None => path,
48+
Some(maybe_decomposed) => match precompose(maybe_decomposed.into()) {
49+
Cow::Borrowed(_) => path,
50+
Cow::Owned(precomposed) => Cow::Owned(precomposed.into()),
51+
},
52+
}
53+
}

gix-utils/tests/str/mod.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
mod decompose {
2+
use std::borrow::Cow;
3+
4+
#[test]
5+
fn precomposed_unicode_is_decomposed() {
6+
let precomposed = "ä";
7+
let actual = gix_utils::str::decompose(precomposed.into());
8+
assert!(matches!(actual, Cow::Owned(_)), "new data is produced");
9+
assert_eq!(actual, "a\u{308}");
10+
}
11+
12+
#[test]
13+
fn already_decomposed_does_not_copy() {
14+
let decomposed = "a\u{308}";
15+
let actual = gix_utils::str::decompose(decomposed.into());
16+
assert!(
17+
matches!(actual, Cow::Borrowed(_)),
18+
"pass-through as nothing needs to be done"
19+
);
20+
assert_eq!(actual, decomposed);
21+
}
22+
}
23+
24+
mod precompose {
25+
use std::borrow::Cow;
26+
27+
#[test]
28+
fn decomposed_unicode_is_precomposed() {
29+
let decomposed = "a\u{308}";
30+
let actual = gix_utils::str::precompose(decomposed.into());
31+
assert!(matches!(actual, Cow::Owned(_)), "new data is produced");
32+
assert_eq!(actual.chars().collect::<Vec<_>>(), ['ä']);
33+
}
34+
35+
#[test]
36+
fn already_precomposed_does_not_copy() {
37+
let actual = gix_utils::str::precompose("ä".into());
38+
assert!(
39+
matches!(actual, Cow::Borrowed(_)),
40+
"pass-through as nothing needs to be done"
41+
);
42+
assert_eq!(actual.chars().collect::<Vec<_>>(), ['ä']);
43+
}
44+
}

gix-utils/tests/utils.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
mod backoff;
22
mod buffers;
3+
mod str;

0 commit comments

Comments
 (0)