Skip to content

Commit 3bedd87

Browse files
committed
Add test code and remove unnecessary code
1 parent 8108bc2 commit 3bedd87

File tree

2 files changed

+53
-19
lines changed

2 files changed

+53
-19
lines changed

ext/mbstring/mbstring.c

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3244,31 +3244,17 @@ PHP_FUNCTION(mb_levenshtein)
32443244
}
32453245
if (tmp_wchar_len_2 == 0) {
32463246
/* Insertion process when there is a surplus of 128 code points. */
3247-
for (i2 = 0; i2 < tmp_wchar_len_1 && len_2 != 0; i2++) {
3248-
/* for overflow */
3249-
if (len_2 < tmp_wchar_len_1) {
3250-
c0 = p1[i2] + cost_rep;
3251-
c1 = p1[i2 + 1] + cost_del;
3252-
} else {
3253-
c0 = p1[i2 + (len_2 - tmp_wchar_len_1)] + cost_rep;
3254-
c1 = p1[i2 + (len_2 - tmp_wchar_len_1) + 1] + cost_del;
3255-
}
3247+
for (i2 = 0; i2 < tmp_wchar_len_1; i2++) {
3248+
c0 = p1[i2 + (len_2 - tmp_wchar_len_1)] + cost_rep;
3249+
c1 = p1[i2 + (len_2 - tmp_wchar_len_1) + 1] + cost_del;
32563250
if (c1 < c0) {
32573251
c0 = c1;
32583252
}
3259-
if (len_2 < tmp_wchar_len_1) {
3260-
c2 = p2[i2] + cost_ins;
3261-
} else {
3262-
c2 = p2[i2] + cost_ins;
3263-
}
3253+
c2 = p2[i2] + cost_ins;
32643254
if (c2 < c0) {
32653255
c0 = c2;
32663256
}
3267-
if (len_2 < tmp_wchar_len_1) {
3268-
p2[i2 + 1] = c0;
3269-
} else {
3270-
p2[i2 + (len_2 - tmp_wchar_len_1) + 1] = c0;
3271-
}
3257+
p2[i2 + (len_2 - tmp_wchar_len_1) + 1] = c0;
32723258
}
32733259
} else {
32743260
for (i2 = 0; i2 < tmp_wchar_len_2; i2++) {

ext/mbstring/tests/mb_levenshtein.phpt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,45 @@ echo '--- 128 codepoints over Hiragana in Shift_JIS ---' . \PHP_EOL;
4747
$hiragana_a = mb_convert_encoding("", "SJIS", "UTF-8");
4848
$hiragana_aiu = mb_convert_encoding("あいう", "SJIS", "UTF-8");
4949
var_dump(mb_levenshtein(str_repeat($hiragana_a, 128 + 3), str_repeat($hiragana_a, 128) . $hiragana_aiu, encoding: "SJIS"));
50+
51+
echo '--- Usecase of userland code ---' . \PHP_EOL;
52+
/* from: https://qiita.com/mpyw/items/2b636827730e06c71e3d */
53+
$query = 'ほあようごぁいまーしゅ';
54+
$comps = [
55+
'こんにちはー',
56+
'おはようございまーす',
57+
'こんばんはー',
58+
'おやすみなさーい',
59+
'いただきまーす',
60+
'おつかれさまー',
61+
'ぬぁあああんつかれたもぉぉぉぉぉぉん',
62+
];
63+
$min = 99999;
64+
$min_key = 0;
65+
foreach ($comps as $key => $comp) {
66+
$sim = mb_levenshtein($query, $comp);
67+
if ($min >= $sim) {
68+
$min = $sim;
69+
$min_key = $key;
70+
}
71+
}
72+
var_dump($comps[$min_key]);
73+
74+
$base = 'やんほぬ';
75+
$comps = [
76+
'かんのみほ',
77+
'かんのみほう',
78+
'かんぺみろ',
79+
'ああいいふろ',
80+
'ちゃんとみろ',
81+
'ターミナルさん',
82+
];
83+
foreach ($comps as $comp) {
84+
var_dump(mb_levenshtein($base, $comp));
85+
}
86+
87+
/* from: https://qiita.com/suin/items/a0a8227addad11ff2ea7 */
88+
var_dump(mb_levenshtein('あとうかい', 'かとうあい')); // int(2)
5089
?>
5190
--EXPECT--
5291
--- Equal ---
@@ -84,3 +123,12 @@ int(130)
84123
int(2)
85124
--- 128 codepoints over Hiragana in Shift_JIS ---
86125
int(2)
126+
--- Usecase of userland code ---
127+
string(30) "おはようございまーす"
128+
int(4)
129+
int(4)
130+
int(4)
131+
int(6)
132+
int(5)
133+
int(7)
134+
int(2)

0 commit comments

Comments
 (0)