@@ -123,15 +123,42 @@ fn parse_desc(desc: str) -> option<str> {
123
123
fn first_sentence ( s : str ) -> option < str > {
124
124
let paras = paragraphs ( s) ;
125
125
if vec:: is_not_empty ( paras) {
126
- let first = vec:: head ( sentences ( vec :: head ( paras) ) ) ;
127
- some ( str:: replace ( first , "\n " , " " ) )
126
+ let first_para = vec:: head ( paras) ;
127
+ some ( str:: replace ( first_sentence_ ( first_para ) , "\n " , " " ) )
128
128
} else {
129
129
none
130
130
}
131
131
}
132
132
133
- fn sentences ( s : str ) -> [ str ] {
134
- str:: split_char ( s, '.' )
133
+ fn first_sentence_ ( s : str ) -> str {
134
+ let dotcount = 0 ;
135
+ // The index of the character following a single dot. This allows
136
+ // Things like [0..1) to appear in the brief description
137
+ let idx = str:: find ( s) { |ch|
138
+ if ch == '.' {
139
+ dotcount += 1 ;
140
+ false
141
+ } else {
142
+ if dotcount == 1 {
143
+ true
144
+ } else {
145
+ dotcount = 0 ;
146
+ false
147
+ }
148
+ }
149
+ } ;
150
+ alt idx {
151
+ some( idx) if idx > 2 u {
152
+ str:: slice ( s, 0 u, idx - 1 u)
153
+ }
154
+ _ {
155
+ if str:: ends_with ( s, "." ) {
156
+ str:: slice ( s, 0 u, str:: len ( s) )
157
+ } else {
158
+ s
159
+ }
160
+ }
161
+ }
135
162
}
136
163
137
164
fn paragraphs ( s : str ) -> [ str ] {
@@ -216,4 +243,34 @@ counties.");
216
243
let brief = extract ( desc) ;
217
244
assert brief == some (
218
245
"Warkworth Castle is a ruined medieval building in the town" ) ;
219
- }
246
+ }
247
+
248
+ #[ test]
249
+ fn should_not_consider_double_period_to_end_sentence ( ) {
250
+ let desc = some ( "Warkworth..Castle is a ruined medieval building
251
+ in the town. of the same name in the English county of Northumberland,
252
+ and the town and castle occupy a loop of the River Coquet, less than a mile
253
+ from England's north-east coast. When the castle was founded is uncertain,
254
+ but traditionally its construction has been ascribed to Prince Henry of
255
+ Scotland in the mid 12th century, although it may have been built by
256
+ King Henry II of England when he took control of England'snorthern
257
+ counties." ) ;
258
+ let brief = extract ( desc) ;
259
+ assert brief == some (
260
+ "Warkworth..Castle is a ruined medieval building in the town" ) ;
261
+ }
262
+
263
+ #[ test]
264
+ fn should_not_consider_triple_period_to_end_sentence ( ) {
265
+ let desc = some ( "Warkworth... Castle is a ruined medieval building
266
+ in the town. of the same name in the English county of Northumberland,
267
+ and the town and castle occupy a loop of the River Coquet, less than a mile
268
+ from England's north-east coast. When the castle was founded is uncertain,
269
+ but traditionally its construction has been ascribed to Prince Henry of
270
+ Scotland in the mid 12th century, although it may have been built by
271
+ King Henry II of England when he took control of England'snorthern
272
+ counties." ) ;
273
+ let brief = extract ( desc) ;
274
+ assert brief == some (
275
+ "Warkworth... Castle is a ruined medieval building in the town" ) ;
276
+ }
0 commit comments