Skip to content

Commit 8629038

Browse files
committed
Initial commit.
0 parents  commit 8629038

File tree

4 files changed

+365
-0
lines changed

4 files changed

+365
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/target
2+
**/*.rs.bk
3+
Cargo.lock

Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[package]
2+
name = "css-typed-om-syntax"
3+
version = "0.1.0"
4+
authors = ["Emilio Cobos Álvarez <[email protected]>"]
5+
edition = "2018"
6+
description = "Parser https://drafts.css-houdini.org/css-properties-values-api-1/#parsing-syntax"
7+
8+
[dependencies]

src/ascii.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
2+
3+
/// Trims ascii whitespace characters from a slice, and returns the trimmed
4+
/// input.
5+
pub fn trim_ascii_whitespace(input: &[u8]) -> &[u8] {
6+
if input.is_empty() {
7+
return input;
8+
}
9+
10+
let mut start = 0;
11+
{
12+
let mut iter = input.iter();
13+
loop {
14+
let byte = match iter.next() {
15+
Some(b) => b,
16+
None => return &[],
17+
};
18+
19+
if !byte.is_ascii_whitespace() {
20+
break;
21+
}
22+
start += 1;
23+
}
24+
}
25+
26+
let mut end = input.len();
27+
assert!(start < end);
28+
{
29+
let mut iter = input[start..].iter().rev();
30+
loop {
31+
let byte = match iter.next() {
32+
Some(b) => b,
33+
None => {
34+
debug_assert!(false, "We should have caught this in the loop above!");
35+
return &[];
36+
},
37+
};
38+
39+
if !byte.is_ascii_whitespace() {
40+
break;
41+
}
42+
end -= 1;
43+
};
44+
}
45+
46+
&input[start..end]
47+
}
48+
49+
#[test]
50+
fn trim_ascii_whitespace_test() {
51+
fn test(i: &str, o: &str) {
52+
assert_eq!(
53+
trim_ascii_whitespace(i.as_bytes()),
54+
o.as_bytes(),
55+
)
56+
}
57+
58+
test("", "");
59+
test(" ", "");
60+
test(" a b c ", "a b c");
61+
test(" \t \t \ta b c \t \t \t \t", "a b c");
62+
}

src/lib.rs

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
mod ascii;
2+
3+
/// https://drafts.css-houdini.org/css-properties-values-api-1/#parsing-syntax
4+
#[derive(Debug, PartialEq)]
5+
pub struct Descriptor(Box<[Component]>);
6+
impl Descriptor {
7+
fn universal() -> Self {
8+
Descriptor(Box::new([]))
9+
}
10+
}
11+
12+
#[derive(Debug, PartialEq)]
13+
pub enum ParseError {
14+
EmptyInput,
15+
UnexpectedEOF,
16+
UnexpectedPipe,
17+
InvalidCustomIdent,
18+
InvalidNameStart,
19+
EmptyName,
20+
}
21+
22+
/// https://drafts.css-houdini.org/css-properties-values-api-1/#multipliers
23+
#[derive(Debug, PartialEq)]
24+
pub enum Multiplier {
25+
Space,
26+
Comma,
27+
}
28+
29+
#[derive(Debug, PartialEq)]
30+
pub struct Component {
31+
pub name: ComponentName,
32+
pub multiplier: Option<Multiplier>,
33+
}
34+
35+
#[derive(Debug, PartialEq)]
36+
pub struct CustomIdent(Box<[u8]>);
37+
38+
impl CustomIdent {
39+
fn from_bytes(ident: &[u8]) -> Result<Self, ParseError> {
40+
if ident.eq_ignore_ascii_case(b"inherit") ||
41+
ident.eq_ignore_ascii_case(b"reset") ||
42+
ident.eq_ignore_ascii_case(b"revert") ||
43+
ident.eq_ignore_ascii_case(b"unset") ||
44+
ident.eq_ignore_ascii_case(b"default") {
45+
return Err(ParseError::InvalidCustomIdent);
46+
}
47+
Ok(CustomIdent(ident.to_vec().into_boxed_slice()))
48+
}
49+
}
50+
51+
52+
#[derive(Debug, PartialEq)]
53+
pub enum ComponentName {
54+
DataType(DataType),
55+
Ident(CustomIdent),
56+
}
57+
58+
impl DataType {
59+
fn is_pre_multiplied(&self) -> bool {
60+
false
61+
}
62+
}
63+
64+
impl ComponentName {
65+
/// https://drafts.css-houdini.org/css-properties-values-api-1/#pre-multiplied-data-type-name
66+
fn is_pre_multiplied(&self) -> bool {
67+
match *self {
68+
ComponentName::DataType(ref t) => t.is_pre_multiplied(),
69+
ComponentName::Ident(..) => false,
70+
}
71+
}
72+
}
73+
74+
#[derive(Debug, PartialEq)]
75+
pub enum DataType {}
76+
77+
/// Parse a syntax descriptor or universal syntax descriptor.
78+
pub fn parse_descriptor(input: &str) -> Result<Descriptor, ParseError> {
79+
let input = input.as_bytes();
80+
// 1. Strip leading and trailing ASCII whitespace from string.
81+
let input = ascii::trim_ascii_whitespace(input);
82+
83+
// 2. If string's length is 0, return failure.
84+
if input.is_empty() {
85+
return Err(ParseError::EmptyInput);
86+
}
87+
88+
// 3. If string's length is 1, and the only code point in string is U+002A
89+
// ASTERISK (*), return the universal syntax descriptor.
90+
if input.len() == 1 && input[0] == b'*' {
91+
return Ok(Descriptor::universal());
92+
}
93+
94+
// 4. Let stream be an input stream created from the code points of string,
95+
// preprocessed as specified in [css-syntax-3]. Let descriptor be an
96+
// initially empty list of syntax components.
97+
//
98+
// NOTE(emilio): Instead of preprocessing we cheat and treat new-lines and
99+
// nulls in the parser specially.
100+
let mut components = vec![];
101+
{
102+
let mut parser = Parser::new(input, &mut components);
103+
// 5. Repeatedly consume the next input code point from stream.
104+
parser.parse()?;
105+
}
106+
Ok(Descriptor(components.into_boxed_slice()))
107+
}
108+
109+
struct Parser<'a, 'b> {
110+
input: &'a [u8],
111+
position: usize,
112+
output: &'b mut Vec<Component>,
113+
}
114+
115+
/// https://drafts.csswg.org/css-syntax-3/#whitespace
116+
fn is_whitespace(byte: u8) -> bool {
117+
match byte {
118+
b'\t' | b'\n' | b'\r' | b'\x0c' => true,
119+
_ => false,
120+
}
121+
}
122+
123+
/// https://drafts.csswg.org/css-syntax-3/#letter
124+
fn is_letter(byte: u8) -> bool {
125+
match byte {
126+
b'A'...b'Z' |
127+
b'a'...b'a' => true,
128+
_ => false,
129+
}
130+
}
131+
132+
/// https://drafts.csswg.org/css-syntax-3/#non-ascii-code-point
133+
fn is_non_ascii(byte: u8) -> bool {
134+
byte >= 0x80
135+
}
136+
137+
/// https://drafts.csswg.org/css-syntax-3/#digit
138+
fn is_digit(byte: u8) -> bool {
139+
match byte {
140+
b'0'...b'9' => true,
141+
_ => false,
142+
}
143+
}
144+
145+
/// https://drafts.csswg.org/css-syntax-3/#name-start-code-point
146+
fn is_name_start(byte: u8) -> bool {
147+
is_letter(byte) || is_non_ascii(byte) || byte == b'_'
148+
}
149+
150+
/// https://drafts.csswg.org/css-syntax-3/#name-code-point
151+
fn is_name(byte: u8) -> bool {
152+
is_name_start(byte) || is_digit(byte) || byte == b'-'
153+
}
154+
155+
impl<'a, 'b> Parser<'a, 'b> {
156+
fn new(input: &'a [u8], output: &'b mut Vec<Component>) -> Self {
157+
Self {
158+
input,
159+
position: 0,
160+
output,
161+
}
162+
}
163+
164+
fn peek(&self) -> Option<u8> {
165+
self.input.get(self.position).cloned()
166+
}
167+
168+
fn parse(&mut self) -> Result<(), ParseError> {
169+
// 5. Repeatedly consume the next input code point from stream:
170+
loop {
171+
let byte = match self.peek() {
172+
None => {
173+
// EOF: If descriptor's size is greater than zero, return
174+
// descriptor; otherwise, return failure.
175+
if self.output.is_empty() {
176+
return Err(ParseError::UnexpectedEOF);
177+
}
178+
return Ok(());
179+
}
180+
Some(b) => b,
181+
};
182+
183+
// whitespace: Do nothing.
184+
if is_whitespace(byte) {
185+
self.position += 1;
186+
continue;
187+
}
188+
189+
// U+007C VERTICAL LINE (|):
190+
// * If descriptor's size is greater than zero, consume a syntax
191+
// component from stream. If failure was returned, return failure;
192+
// otherwise, append the returned value to descriptor.
193+
// * If descriptor's size is zero, return failure.
194+
if byte == b'|' {
195+
if self.output.is_empty() {
196+
return Err(ParseError::UnexpectedPipe);
197+
}
198+
self.position += 1;
199+
}
200+
201+
let component = self.parse_component()?;
202+
self.output.push(component)
203+
}
204+
}
205+
206+
fn skip_whitespace(&mut self) {
207+
loop {
208+
match self.peek() {
209+
Some(c) if is_whitespace(c) => self.position += 1,
210+
_ => return,
211+
}
212+
}
213+
}
214+
215+
/// https://drafts.css-houdini.org/css-properties-values-api-1/#consume-data-type-name
216+
fn parse_data_type_name(&mut self) -> Result<DataType, ParseError> {
217+
unimplemented!()
218+
}
219+
220+
/// https://drafts.csswg.org/css-syntax-3/#consume-a-name
221+
/// FIXME(emilio): This should actually use cssparser's consume_name
222+
/// to handle correctly escaping and nulls.
223+
fn consume_name(&mut self) -> &'a [u8] {
224+
let start = self.position;
225+
226+
loop {
227+
let byte = match self.peek() {
228+
None => return &self.input[start..],
229+
Some(b) => b,
230+
};
231+
232+
if !is_name(byte) {
233+
break;
234+
}
235+
self.position += 1;
236+
}
237+
238+
&self.input[start..self.position]
239+
}
240+
241+
fn parse_name(&mut self) -> Result<ComponentName, ParseError> {
242+
let b = match self.peek() {
243+
Some(b) => b,
244+
None => return Err(ParseError::UnexpectedEOF),
245+
};
246+
247+
if b == b'<' {
248+
self.position += 1;
249+
return Ok(ComponentName::DataType(self.parse_data_type_name()?));
250+
}
251+
252+
if b != b'\\' && !is_name_start(b) {
253+
return Err(ParseError::InvalidNameStart);
254+
}
255+
256+
let name = self.consume_name();
257+
if name.is_empty() {
258+
return Err(ParseError::EmptyName);
259+
}
260+
return Ok(ComponentName::Ident(CustomIdent::from_bytes(name)?))
261+
}
262+
263+
fn parse_multiplier(&mut self) -> Option<Multiplier> {
264+
let multiplier = match self.peek()? {
265+
b'+' => Multiplier::Space,
266+
b'#' => Multiplier::Comma,
267+
_ => return None,
268+
};
269+
self.position += 1;
270+
Some(multiplier)
271+
}
272+
273+
/// https://drafts.css-houdini.org/css-properties-values-api-1/#consume-a-syntax-component
274+
fn parse_component(&mut self) -> Result<Component, ParseError> {
275+
// Consume as much whitespace as possible from stream.
276+
self.skip_whitespace();
277+
let name = self.parse_name()?;
278+
let multiplier = if name.is_pre_multiplied() {
279+
None
280+
} else {
281+
self.parse_multiplier()
282+
};
283+
Ok(Component { name, multiplier })
284+
}
285+
}
286+
287+
#[test]
288+
fn universal() {
289+
for syntax in &["*", " * ", "* ", "\t*\t"] {
290+
assert_eq!(parse_descriptor(syntax), Ok(Descriptor::universal()));
291+
}
292+
}

0 commit comments

Comments
 (0)