Skip to content

Extract RcDom into its own crate with no support guarantees #386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
members = [
"markup5ever",
"html5ever",
"rcdom",
"xml5ever"
]
1 change: 1 addition & 0 deletions html5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ serde_json = "1.0"
rustc-test = "0.3"
typed-arena = "1.3.0"
criterion = "0.3"
markup5ever_rcdom = { version = "0.1", path = "../rcdom" }

[build-dependencies]
quote = "1"
Expand Down
6 changes: 4 additions & 2 deletions html5ever/examples/html2html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@
//! where htmlparser-1.4.jar comes from http://about.validator.nu/htmlparser/

extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;

use std::default::Default;
use std::io::{self, Write};

use html5ever::driver::ParseOpts;
use html5ever::rcdom::RcDom;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
use html5ever::{parse_document, serialize};
use rcdom::{RcDom, SerializableHandle};

fn main() {
let opts = ParseOpts {
Expand All @@ -45,7 +46,8 @@ fn main() {
.write_all(b"<!DOCTYPE html>\n")
.ok()
.expect("writing DOCTYPE failed");
serialize(&mut io::stdout(), &dom.document, Default::default())
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut io::stdout(), &document, Default::default())
.ok()
.expect("serialization failed");
}
3 changes: 2 additions & 1 deletion html5ever/examples/print-rcdom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@

#[macro_use]
extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;

use std::default::Default;
use std::io;
use std::iter::repeat;
use std::string::String;

use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::tendril::TendrilSink;
use rcdom::{Handle, NodeData, RcDom};

// This is not proper HTML serialization, of course.

Expand Down
8 changes: 5 additions & 3 deletions html5ever/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,20 @@ impl<Sink: TreeSink> Parser<Sink> {

#[cfg(test)]
mod tests {
extern crate markup5ever_rcdom;
use super::*;
use crate::rcdom::RcDom;
use self::markup5ever_rcdom::{RcDom, SerializableHandle};
use crate::serialize::serialize;
use crate::tendril::TendrilSink;
use tendril::TendrilSink;

#[test]
fn from_utf8() {
let dom = parse_document(RcDom::default(), ParseOpts::default())
.from_utf8()
.one("<title>Test".as_bytes());
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut serialized, &document, Default::default()).unwrap();
assert_eq!(
String::from_utf8(serialized).unwrap().replace(" ", ""),
"<html><head><title>Test</title></head><body></body></html>"
Expand Down
13 changes: 7 additions & 6 deletions html5ever/src/tree_builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1683,9 +1683,10 @@ where
#[cfg(test)]
#[allow(non_snake_case)]
mod test {
use crate::interface::{AppendNode, AppendText, NodeOrText};
use crate::interface::{ElementFlags, Tracer, TreeSink};
use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
extern crate markup5ever_rcdom as rcdom;
use markup5ever::interface::{AppendNode, AppendText, NodeOrText};
use markup5ever::interface::{ElementFlags, Tracer, TreeSink};
use markup5ever::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};

use super::types::*;

Expand All @@ -1698,6 +1699,7 @@ mod test {
use crate::tokenizer::states as tok_state;
use crate::tokenizer::{Doctype, StartTag, Tag, TokenSink};
use crate::tokenizer::{Tokenizer, TokenizerOpts};
use crate::driver::*;

use crate::util::str::is_ascii_whitespace;

Expand All @@ -1708,9 +1710,8 @@ mod test {
use std::mem::replace;

use super::{TreeBuilder, TreeBuilderOpts};
use crate::driver::*;
use crate::{Attribute, local_name, namespace_url, ns};
use crate::rcdom::{Handle, Node, NodeData, RcDom};
use markup5ever::{Attribute, local_name, namespace_url, ns};
use self::rcdom::{Handle, Node, NodeData, RcDom};

pub struct LineCountingDOM {
pub line_vec: Vec<(QualName, u64)>,
Expand Down
13 changes: 8 additions & 5 deletions html5ever/tests/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@

#[macro_use]
extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;

use std::default::Default;

use html5ever::driver::ParseOpts;
use html5ever::rcdom::RcDom;
use html5ever::serialize::{Serialize, SerializeOpts, Serializer, TraversalScope};
use html5ever::tendril::{SliceExt, StrTendril, TendrilSink};
use html5ever::tokenizer::{TagKind, Token, TokenSink, TokenSinkResult, Tokenizer};
use html5ever::{parse_document, parse_fragment, serialize, QualName};
use rcdom::{RcDom, SerializableHandle};

use std::io;

Expand Down Expand Up @@ -98,10 +99,10 @@ fn parse_and_serialize(input: StrTendril) -> StrTendril {
vec![],
)
.one(input);
let inner = &dom.document.children.borrow()[0];
let inner: SerializableHandle = dom.document.children.borrow()[0].clone().into();

let mut result = vec![];
serialize(&mut result, inner, Default::default()).unwrap();
serialize(&mut result, &inner, Default::default()).unwrap();
StrTendril::try_from_byte_slice(&result).unwrap()
}

Expand Down Expand Up @@ -242,7 +243,8 @@ fn doctype() {
let dom = parse_document(RcDom::default(), ParseOpts::default()).one("<!doctype html>");
dom.document.children.borrow_mut().truncate(1); // Remove <html>
let mut result = vec![];
serialize(&mut result, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut result, &document, Default::default()).unwrap();
assert_eq!(String::from_utf8(result).unwrap(), "<!DOCTYPE html>");
}

Expand All @@ -259,6 +261,7 @@ fn deep_tree() {
let document = &dom.document;
let opts = SerializeOpts::default();
let mut ret_val = Vec::new();
serialize(&mut ret_val, document, opts)
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut ret_val, &document, opts)
.expect("Writing to a string shouldn't fail (expect on OOM)");
}
3 changes: 2 additions & 1 deletion html5ever/tests/tree_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

extern crate markup5ever_rcdom as rcdom;
extern crate rustc_test as test;
#[macro_use]
extern crate html5ever;
Expand All @@ -24,10 +25,10 @@ use std::path::Path;
use std::{env, fs, io};
use test::{DynTestName, TestDesc, TestDescAndFn, TestFn};

use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{parse_document, parse_fragment, ParseOpts};
use html5ever::{LocalName, QualName};
use rcdom::{Handle, NodeData, RcDom};

fn parse_tests<It: Iterator<Item = String>>(mut lines: It) -> Vec<HashMap<String, String>> {
let mut tests = vec![];
Expand Down
2 changes: 2 additions & 0 deletions markup5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"

[dev-dependencies]
markup5ever_rcdom = { version = "0.1", path = "../rcdom" }
3 changes: 2 additions & 1 deletion markup5ever/interface/tree_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,10 @@ pub struct ElementFlags {
///
/// ```
/// # #[macro_use] extern crate markup5ever;
/// # extern crate markup5ever_rcdom as rcdom;
///
/// # fn main() {
/// use markup5ever::{rcdom, QualName, Attribute};
/// use markup5ever::{QualName, Attribute};
/// use markup5ever::interface::create_element;
///
/// let mut dom = rcdom::RcDom::default();
Expand Down
1 change: 0 additions & 1 deletion markup5ever/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ include!(concat!(env!("OUT_DIR"), "/generated.rs"));
pub mod data;
#[macro_use]
pub mod interface;
pub mod rcdom;
pub mod serialize;
mod util {
pub mod buffer_queue;
Expand Down
17 changes: 17 additions & 0 deletions rcdom/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "markup5ever_rcdom"
version = "0.1.0"
authors = [ "The html5ever Project Developers" ]
license = "MIT / Apache-2.0"
repository = "https://github.com/servo/html5ever"
description = "Basic, unsupported DOM structure for use by tests in html5ever/xml5ever"
readme = "README.md"
documentation = "https://docs.rs/markup5ever_rcdom"
categories = [ "parser-implementations", "web-programming" ]

[lib]
path = "lib.rs"

[dependencies]
tendril = "0.4"
markup5ever = { version = "0.10", path = "../markup5ever" }
7 changes: 7 additions & 0 deletions rcdom/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# markup5ever_rcdom

This crate is built for the express purpose of writing automated tests for the `html5ever`
and `xml5ever` crates. It is not intended to be a production-quality DOM implementation,
and has not been fuzzed or tested against arbitrary, malicious, or nontrivial inputs. No maintenance
or support for any such issues will be provided. If you use this DOM implementation in a production,
user-facing system, you do so at your own risk.
33 changes: 22 additions & 11 deletions markup5ever/rcdom.rs → rcdom/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model

extern crate markup5ever;
extern crate tendril;

use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
Expand All @@ -47,14 +50,14 @@ use std::rc::{Rc, Weak};

use tendril::StrTendril;

use crate::interface::tree_builder;
use crate::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use crate::serialize::TraversalScope;
use crate::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
use crate::serialize::{Serialize, Serializer};
use crate::Attribute;
use crate::ExpandedName;
use crate::QualName;
use markup5ever::interface::tree_builder;
use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use markup5ever::serialize::TraversalScope;
use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
use markup5ever::serialize::{Serialize, Serializer};
use markup5ever::Attribute;
use markup5ever::ExpandedName;
use markup5ever::QualName;

/// The different kinds of nodes in the DOM.
#[derive(Debug)]
Expand Down Expand Up @@ -433,14 +436,22 @@ enum SerializeOp {
Close(QualName)
}

impl Serialize for Handle {
pub struct SerializableHandle(Handle);

impl From<Handle> for SerializableHandle {
fn from(h: Handle) -> SerializableHandle {
SerializableHandle(h)
}
}

impl Serialize for SerializableHandle {
fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
where
S: Serializer,
{
let mut ops = match traversal_scope {
IncludeNode => vec![SerializeOp::Open(self.clone())],
ChildrenOnly(_) => self
IncludeNode => vec![SerializeOp::Open(self.0.clone())],
ChildrenOnly(_) => self.0
.children
.borrow()
.iter()
Expand Down
1 change: 1 addition & 0 deletions xml5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ markup5ever = {version = "0.10", path = "../markup5ever" }
serde_json = "1.0"
rustc-test = "0.3"
criterion = "0.2"
markup5ever_rcdom = {version = "0.1", path = "../rcdom" }

[[bench]]
name = "xml5ever"
Expand Down
3 changes: 2 additions & 1 deletion xml5ever/examples/hello_xml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
//! xml5ever = "0.2.0"
//! tendril = "0.1.3"
//! ```
extern crate markup5ever_rcdom as rcdom;
extern crate xml5ever;

use std::default::Default;

use rcdom::{NodeData, RcDom};
use xml5ever::driver::parse_document;
use xml5ever::rcdom::{NodeData, RcDom};
use xml5ever::tendril::TendrilSink;
use xml5ever::tree_builder::TreeSink;

Expand Down
3 changes: 2 additions & 1 deletion xml5ever/examples/xml_tree_printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
//! xml5ever = "0.2.0"
//! tendril = "0.1.3"
//! ```
extern crate markup5ever_rcdom as rcdom;
extern crate xml5ever;

use std::default::Default;
use std::io;
use std::string::String;

use rcdom::{Handle, NodeData, RcDom};
use xml5ever::driver::parse_document;
use xml5ever::rcdom::{Handle, NodeData, RcDom};
use xml5ever::tendril::TendrilSink;

fn walk(prefix: &str, handle: &Handle) {
Expand Down
14 changes: 9 additions & 5 deletions xml5ever/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ impl<Sink: TreeSink> XmlParser<Sink> {

#[cfg(test)]
mod tests {
extern crate markup5ever_rcdom;
use super::*;
use crate::rcdom::RcDom;
use self::markup5ever_rcdom::{RcDom, SerializableHandle};
use crate::serialize::serialize;
use crate::tendril::TendrilSink;
use tendril::TendrilSink;

#[test]
fn el_ns_serialize() {
Expand Down Expand Up @@ -170,16 +171,18 @@ mod tests {

fn assert_eq_serialization(text: &'static str, dom: RcDom) {
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut serialized, &document, Default::default()).unwrap();

let dom_from_text = parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one(text.as_bytes());

let mut reserialized = Vec::new();
let document: SerializableHandle = dom_from_text.document.clone().into();
serialize(
&mut reserialized,
&dom_from_text.document,
&document,
Default::default(),
)
.unwrap();
Expand All @@ -192,7 +195,8 @@ mod tests {

fn assert_serialization(text: &'static str, dom: RcDom) {
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut serialized, &document, Default::default()).unwrap();
assert_eq!(String::from_utf8(serialized).unwrap(), text);
}
}
Loading