GitoxideLabs · Byron · Feb 1, 2021 · Feb 1, 2021 · Feb 1, 2021 · Feb 1, 2021
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/git-config/Cargo.toml b/git-config/Cargo.toml
@@ -14,4 +14,5 @@ doctest = false
 
 [dependencies]
 bstr = { version = "0.2.13", default-features = false, features = ["std"] }
+dangerous = { version = "0.7.0", default-features = false, features = ["full-context", "retry"], git = "https://github.com/avitex/rust-dangerous" }
 quick-error = "2.0.0"
diff --git a/git-config/src/file/mod.rs b/git-config/src/file/mod.rs
@@ -32,12 +32,7 @@ impl Token {
 pub struct File {
     buf: Vec<u8>,
     /// A config file as parsed into tokens, where each [`Token`] is one of the three relevant items in git config files.
-    tokens: Vec<Token>, // but how do we get fast lookups and proper value lookup based on decoded values?
-                        // On the fly is easier, otherwise we have to deal with a lookup cache of sorts and
-                        // many more allocations up front (which might be worth it only once we have measurements).
-                        // Cow<'a, _> would bind to our buffer so the cache can't be in this type.
-                        // Probably it could be the 'Config' type which handles multiple files and treats them as one,
-                        // and only if there is any need.
+    tokens: Vec<Token>,
 }
 
 impl File {

diff --git a/git-config/src/lib.rs b/git-config/src/lib.rs
@@ -30,6 +30,15 @@ impl From<Span> for Range<usize> {
     }
 }
 
+impl From<Range<usize>> for Span {
+    fn from(Range { start, end }: Range<usize>) -> Self {
+        Span {
+            start,
+            end_inclusive: end - 1,
+        }
+    }
+}
+
 impl Span {
     /// Convert a span into the standard library range type.
     fn to_range(&self) -> Range<usize> {
@@ -41,6 +50,8 @@ impl Span {
 pub mod file;
 pub use file::File;
 
+pub(crate) mod parse;
+
 /// A module with specialized value types as they exist within git config files.
 pub mod value;
 

diff --git a/git-config/src/parse.rs b/git-config/src/parse.rs
@@ -0,0 +1,142 @@
+use crate::{file, spanned};
+use dangerous::{Bytes, BytesReader, Error, Expected, Input};
+
+fn config(bytes: &[u8]) -> Result<Vec<file::Token>, Expected<'_>> {
+    fn config<'i, E>(input: Bytes<'i>, r: &mut BytesReader<'i, E>) -> Result<Vec<file::Token>, E>
+    where
+        E: Error<'i>,
+    {
+        let mut tokens = Vec::new();
+        if let Some(section) = skip_whitespace_or_comment(r, ConsumeTo::NextToken) {
+            tokens.push(spanned::Comment(
+                dangerous::input(section)
+                    .span_of(&input)
+                    .expect("range contained")
+                    .into(),
+            ))
+        };
+        unimplemented!("sections and values");
+    }
+    let input = dangerous::input(bytes);
+    input.read_all(|r| config(dangerous::input(bytes), r))
+}
+
+enum ConsumeTo {
+    NextToken,
+    EndOfLine,
+}
+
+#[must_use]
+fn skip_whitespace_or_comment<'a, E>(r: &mut BytesReader<'a, E>, to_where: ConsumeTo) -> Option<&'a [u8]> {
+    fn skip_whitespace_or_comment<E>(r: &mut BytesReader<'_, E>, to_where: ConsumeTo) {
+        fn skip_comment<E>(r: &mut BytesReader<'_, E>) -> usize {
+            if r.peek_eq(b'#') {
+                r.take_until_opt(b'\n').len()
+            } else {
+                0
+            }
+        }
+
+        let (mut last, mut current) = (0, 0);
+        loop {
+            current += skip_comment(r);
+            current += r
+                .take_while(|c: u8| {
+                    let iwb = c.is_ascii_whitespace();
+                    iwb && match to_where {
+                        ConsumeTo::NextToken => true,
+                        ConsumeTo::EndOfLine => c != b'\n',
+                    }
+                })
+                .len();
+            if last == current {
+                if let ConsumeTo::EndOfLine = to_where {
+                    r.consume_opt(b'\n');
+                }
+                break;
+            }
+            last = current;
+        }
+    }
+    let parsed = r
+        .take_consumed(|r| skip_whitespace_or_comment(r, to_where))
+        .as_dangerous();
+    if parsed.is_empty() {
+        None
+    } else {
+        Some(parsed)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    mod comments {
+        use crate::parse::{skip_whitespace_or_comment, ConsumeTo};
+        use dangerous::Input;
+
+        macro_rules! decode_span {
+            ($name:ident, $input:literal, $option:path, $range:expr, $explain:literal) => {
+                #[test]
+                fn $name() {
+                    let bytes = $input;
+                    let (res, _remaining) =
+                        dangerous::input(bytes).read_infallible(|r| skip_whitespace_or_comment(r, $option));
+                    assert_eq!(
+                        res.map(dangerous::input)
+                            .and_then(|s| s.span_of(&dangerous::input(bytes))),
+                        Some($range),
+                        $explain
+                    );
+                }
+            };
+        }
+
+        decode_span!(
+            no_comment_till_next_token,
+            b"     \n     \t\n",
+            ConsumeTo::NextToken,
+            0..13,
+            "it consumes newlines as well, taking everything"
+        );
+
+        decode_span!(
+            no_comment_to_end_of_line,
+            b"     \n     \t ",
+            ConsumeTo::EndOfLine,
+            0..6,
+            "it consumes only a single line, including the EOF marker"
+        );
+
+        decode_span!(
+            comment_to_next_token,
+            b" #ho \n     \t ",
+            ConsumeTo::NextToken,
+            0..13,
+            "comments are the same as whitespace"
+        );
+
+        decode_span!(
+            comment_to_end_of_line,
+            b"# hi \n     \t ",
+            ConsumeTo::EndOfLine,
+            0..6,
+            "comments are the same as whitespace"
+        );
+
+        decode_span!(
+            whitespace_to_token,
+            b"   a=2   \n     \t ",
+            ConsumeTo::NextToken,
+            0..3,
+            "it does not consume tokens"
+        );
+
+        decode_span!(
+            whitespace_to_token_on_next_line,
+            b"    \n  b=2\t ",
+            ConsumeTo::NextToken,
+            0..7,
+            "it does not consume tokens while skipping lines"
+        );
+    }
+}
diff --git a/git-config/src/spanned.rs b/git-config/src/spanned.rs
@@ -4,7 +4,7 @@ use crate::Span;
 // This means we auto-trim whitespace otherwise, which we a feature.
 // All whitespace is automatically an empty comment.
 #[derive(Clone, PartialOrd, PartialEq, Ord, Eq)]
-pub(crate) struct Comment(Span);
+pub(crate) struct Comment(pub(crate) Span);
 
 /// A section or sub-section (in case `sub_name` is `Some()`), i.e.
 ///