Skip to content

Commit 70fdcf0

Browse files
committed
Fixed #3 (semicolon in attributes)
Also some refactorings.
1 parent 5daa2e1 commit 70fdcf0

File tree

4 files changed

+152
-42
lines changed

4 files changed

+152
-42
lines changed

src/common.rs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::fmt;
55
/// XML parsing error.
66
///
77
/// Consists of a row and column reference and a message.
8-
#[deriving(Clone, PartialEq)]
8+
#[deriving(Clone, PartialEq, Eq)]
99
pub struct Error {
1010
row: uint,
1111
col: uint,
@@ -61,7 +61,7 @@ impl Error {
6161
///
6262
/// Consists of optional prefix, optional namespace and mandatory
6363
/// local name.
64-
#[deriving(Clone, PartialEq)]
64+
#[deriving(Clone, PartialEq, Eq)]
6565
pub struct Name {
6666
/// An XML namespace prefix.
6767
///
@@ -97,6 +97,16 @@ impl fmt::Show for Name {
9797
}
9898

9999
impl Name {
100+
/// Returns a `Name` instance representing plain local name.
101+
#[inline]
102+
pub fn new_local(name: &str) -> Name {
103+
Name {
104+
local_name: name.to_string(),
105+
prefix: None,
106+
namespace: None
107+
}
108+
}
109+
100110
/// Returns a slice with namespace prefix of this name, if it is present.
101111
pub fn prefix_ref<'a>(&'a self) -> Option<&'a str> {
102112
match self.prefix {
@@ -124,7 +134,7 @@ impl Name {
124134
/// XML element attribute.
125135
///
126136
/// Consistes of a qualified name and a value.
127-
#[deriving(Clone, PartialEq)]
137+
#[deriving(Clone, PartialEq, Eq)]
128138
pub struct Attribute {
129139
/// Qualified name of the attribute.
130140
pub name: Name,
@@ -133,8 +143,17 @@ pub struct Attribute {
133143
pub value: String
134144
}
135145

146+
impl Attribute {
147+
pub fn new_local(name: &str, value: &str) -> Attribute {
148+
Attribute {
149+
name: Name::new_local(name),
150+
value: value.to_string()
151+
}
152+
}
153+
}
154+
136155
/// XML version enumeration.
137-
#[deriving(Clone, PartialEq)]
156+
#[deriving(Clone, PartialEq, Eq)]
138157
pub enum XmlVersion {
139158
/// XML version 1.0.
140159
Version10,

src/namespace.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,31 @@ pub struct Namespace(pub HashMap<Option<String>, String>);
2525

2626
impl Namespace {
2727
/// Returns an empty namespace.
28+
#[inline]
2829
pub fn empty() -> Namespace { Namespace(HashMap::with_capacity(2)) }
2930

31+
/// Checks whether this namespace is empty.
32+
#[inline]
33+
pub fn is_empty(&self) -> bool {
34+
let Namespace(ref hm) = *self;
35+
hm.is_empty()
36+
}
37+
38+
/// Checks whether this namespace is essentially empty, that is, it does not contain
39+
/// anything but default mappings.
40+
pub fn is_essentially_empty(&self) -> bool {
41+
let Namespace(ref hm) = *self;
42+
for (k, v) in hm.iter() {
43+
match (k.as_ref().map(|k| k.as_slice()), v.as_slice()) {
44+
(None, NS_EMPTY_URI) |
45+
(Some(NS_XMLNS_PREFIX), NS_XMLNS_URI) |
46+
(Some(NS_XML_PREFIX), NS_XML_URI) => {},
47+
_ => return false
48+
}
49+
}
50+
true
51+
}
52+
3053
/// Puts a mapping into this namespace.
3154
///
3255
/// This method does not override already existing mapping.

src/reader/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl<B: Buffer> EventReader<B> {
3232
/// Creates a new parser with the provded configuration, consuming given `Buffer`.
3333
#[inline]
3434
pub fn new_with_config(source: B, config: ParserConfig) -> EventReader<B> {
35-
EventReader { source: source, parser: parser::new(config) }
35+
EventReader { source: source, parser: PullParser::new(config) }
3636
}
3737

3838
/// Pulls and returns next XML event from the stream.

src/reader/parser.rs

Lines changed: 105 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,35 +61,37 @@ pub struct PullParser {
6161
pop_namespace: bool
6262
}
6363

64-
/// Returns a new parser using the given config.
65-
pub fn new(config: ParserConfig) -> PullParser {
66-
PullParser {
67-
config: config,
68-
lexer: lexer::new(),
69-
st: OutsideTag,
70-
buf: String::new(),
71-
nst: NamespaceStack::default(),
72-
73-
data: MarkupData {
74-
name: String::new(),
75-
version: None,
76-
encoding: None,
77-
standalone: None,
78-
ref_data: String::new(),
79-
element_name: None,
80-
quote: None,
81-
attr_name: None,
82-
attributes: vec!()
83-
},
84-
finish_event: None,
85-
next_event: None,
86-
est: Vec::new(),
87-
88-
encountered_element: false,
89-
parsed_declaration: false,
90-
inside_whitespace: true,
91-
read_prefix_separator: false,
92-
pop_namespace: false
64+
impl PullParser {
65+
/// Returns a new parser using the given config.
66+
pub fn new(config: ParserConfig) -> PullParser {
67+
PullParser {
68+
config: config,
69+
lexer: lexer::new(),
70+
st: OutsideTag,
71+
buf: String::new(),
72+
nst: NamespaceStack::default(),
73+
74+
data: MarkupData {
75+
name: String::new(),
76+
version: None,
77+
encoding: None,
78+
standalone: None,
79+
ref_data: String::new(),
80+
element_name: None,
81+
quote: None,
82+
attr_name: None,
83+
attributes: vec!()
84+
},
85+
finish_event: None,
86+
next_event: None,
87+
est: Vec::new(),
88+
89+
encountered_element: false,
90+
parsed_declaration: false,
91+
inside_whitespace: true,
92+
read_prefix_separator: false,
93+
pop_namespace: false
94+
}
9395
}
9496
}
9597

@@ -159,6 +161,29 @@ enum QualifiedNameTarget {
159161
ClosingTagNameTarget
160162
}
161163

164+
#[deriving(PartialEq, Eq)]
165+
enum QuoteToken {
166+
SingleQuoteToken,
167+
DoubleQuoteToken
168+
}
169+
170+
impl QuoteToken {
171+
fn from_token(t: &Token) -> QuoteToken {
172+
match *t {
173+
SingleQuote => SingleQuoteToken,
174+
DoubleQuote => DoubleQuoteToken,
175+
_ => fail!("Unexpected token: {}", t)
176+
}
177+
}
178+
179+
fn as_token(self) -> Token {
180+
match self {
181+
SingleQuoteToken => SingleQuote,
182+
DoubleQuoteToken => DoubleQuote
183+
}
184+
}
185+
}
186+
162187
struct AttributeData {
163188
name: Name,
164189
value: String
@@ -184,7 +209,7 @@ struct MarkupData {
184209

185210
element_name: Option<Name>, // used for element name
186211

187-
quote: Option<Token>, // used to hold opening quote for attribute value
212+
quote: Option<QuoteToken>, // used to hold opening quote for attribute value
188213
attr_name: Option<Name>, // used to hold attribute name
189214
attributes: Vec<AttributeData> // used to hold all accumulated attributes
190215
}
@@ -397,12 +422,12 @@ impl PullParser {
397422
match t {
398423
Whitespace(_) if self.data.quote.is_none() => None, // skip leading whitespace
399424

400-
DoubleQuote | SingleQuote => match self.data.quote.clone() {
425+
DoubleQuote | SingleQuote => match self.data.quote {
401426
None => { // Entered attribute value
402-
self.data.quote = Some(t);
427+
self.data.quote = Some(QuoteToken::from_token(&t));
403428
None
404429
}
405-
Some(ref q) if *q == t => {
430+
Some(q) if q.as_token() == t => {
406431
self.data.quote = None;
407432
let value = self.take_buf();
408433
on_value(self, value)
@@ -415,10 +440,8 @@ impl PullParser {
415440
self.into_state_continue(InsideReference(st))
416441
}
417442

418-
// Everything characters except " and '
419-
_ if t.contains_char_data() => self.append_str_continue(t.to_string().as_slice()),
420-
421-
_ => Some(self_error!(self; "Unexpected token inside attribute value: {}", t))
443+
// Every character except " and ' is okay
444+
_ => self.append_str_continue(t.to_string().as_slice()),
422445
}
423446
}
424447

@@ -1026,4 +1049,49 @@ impl PullParser {
10261049

10271050
#[cfg(test)]
10281051
mod tests {
1052+
use std::io::BufReader;
1053+
1054+
use common::{Name, Attribute};
1055+
use reader::parser::PullParser;
1056+
use reader::ParserConfig;
1057+
use reader::events;
1058+
1059+
fn new_parser() -> PullParser {
1060+
PullParser::new(ParserConfig::new())
1061+
}
1062+
1063+
macro_rules! expect_event(
1064+
($r:expr, $p:expr, $t:pat) => (
1065+
match $p.next(&mut $r) {
1066+
$t => {}
1067+
e => fail!("Unexpected event: {}", e)
1068+
}
1069+
);
1070+
($r:expr, $p:expr, $t:pat if $c:expr) => (
1071+
match $p.next(&mut $r) {
1072+
$t if $c => {}
1073+
e => fail!("Unexpected event: {}", e)
1074+
}
1075+
)
1076+
)
1077+
1078+
#[test]
1079+
fn semicolon_in_attribute_issue_3() {
1080+
static DATA: &'static str = r#"
1081+
<a attr="zzz;zzz" />
1082+
"#;
1083+
let mut r = BufReader::new(DATA.as_bytes());
1084+
1085+
let mut p = new_parser();
1086+
1087+
expect_event!(r, p, events::StartDocument { .. });
1088+
expect_event!(r, p, events::StartElement { ref name, ref attributes, ref namespace }
1089+
if *name == Name::new_local("a") &&
1090+
attributes.len() == 1 &&
1091+
attributes[0] == Attribute::new_local("attr", "zzz;zzz") &&
1092+
namespace.is_essentially_empty()
1093+
);
1094+
expect_event!(r, p, events::EndElement { ref name } if *name == Name::new_local("a"));
1095+
expect_event!(r, p, events::EndDocument);
1096+
}
10291097
}

0 commit comments

Comments
 (0)