Skip to content

Commit e429c11

Browse files
authored
Ensure that the detected charset order is set in chardet test (#12574)
TestToUTF8WithFallback is the cause of recurrent spurious test failures even despite code to set the detected charset order. The reason why this happens is because the preferred detected charset order is not being initialised for these tests. This PR simply ensures that this is set at the start of each test and would allow different tests to be written to allow differing orders. Replaces #12571 Close #12571 Signed-off-by: Andrew Thornton <[email protected]>
1 parent 2026d88 commit e429c11

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

modules/charset/charset_test.go

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,30 @@
55
package charset
66

77
import (
8+
"strings"
89
"testing"
910

1011
"code.gitea.io/gitea/modules/setting"
1112

1213
"github.com/stretchr/testify/assert"
1314
)
1415

16+
func resetDefaultCharsetsOrder() {
17+
defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder))
18+
for _, charset := range setting.Repository.DetectedCharsetsOrder {
19+
defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset)))
20+
}
21+
setting.Repository.DetectedCharsetScore = map[string]int{}
22+
i := 0
23+
for _, charset := range defaultDetectedCharsetsOrder {
24+
canonicalCharset := strings.ToLower(strings.TrimSpace(charset))
25+
if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has {
26+
setting.Repository.DetectedCharsetScore[canonicalCharset] = i
27+
i++
28+
}
29+
}
30+
}
31+
1532
func TestRemoveBOMIfPresent(t *testing.T) {
1633
res := RemoveBOMIfPresent([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
1734
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
@@ -21,6 +38,7 @@ func TestRemoveBOMIfPresent(t *testing.T) {
2138
}
2239

2340
func TestToUTF8WithErr(t *testing.T) {
41+
resetDefaultCharsetsOrder()
2442
var res string
2543
var err error
2644

@@ -76,6 +94,7 @@ func TestToUTF8WithErr(t *testing.T) {
7694
}
7795

7896
func TestToUTF8WithFallback(t *testing.T) {
97+
resetDefaultCharsetsOrder()
7998
// "ABC"
8099
res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43})
81100
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -116,7 +135,7 @@ func TestToUTF8WithFallback(t *testing.T) {
116135
}
117136

118137
func TestToUTF8(t *testing.T) {
119-
138+
resetDefaultCharsetsOrder()
120139
// Note: golang compiler seems so behave differently depending on the current
121140
// locale, so some conversions might behave differently. For that reason, we don't
122141
// depend on particular conversions but in expected behaviors.
@@ -165,6 +184,7 @@ func TestToUTF8(t *testing.T) {
165184
}
166185

167186
func TestToUTF8DropErrors(t *testing.T) {
187+
resetDefaultCharsetsOrder()
168188
// "ABC"
169189
res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43})
170190
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -204,6 +224,7 @@ func TestToUTF8DropErrors(t *testing.T) {
204224
}
205225

206226
func TestDetectEncoding(t *testing.T) {
227+
resetDefaultCharsetsOrder()
207228
testSuccess := func(b []byte, expected string) {
208229
encoding, err := DetectEncoding(b)
209230
assert.NoError(t, err)
@@ -225,10 +246,7 @@ func TestDetectEncoding(t *testing.T) {
225246
b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a}
226247
encoding, err := DetectEncoding(b)
227248
assert.NoError(t, err)
228-
// due to a race condition in `chardet` library, it could either detect
229-
// "ISO-8859-1" or "IS0-8859-2" here. Technically either is correct, so
230-
// we accept either.
231-
assert.Contains(t, encoding, "ISO-8859")
249+
assert.Contains(t, encoding, "ISO-8859-1")
232250

233251
old := setting.Repository.AnsiCharset
234252
setting.Repository.AnsiCharset = "placeholder"

0 commit comments

Comments
 (0)