From e37938a3940d5931cac5880853479a8cc5444023 Mon Sep 17 00:00:00 2001 From: eater <=@eater.me> Date: Thu, 8 Feb 2024 20:21:29 +0100 Subject: [PATCH] Initial commit --- .gitignore | 1 + Cargo.lock | 431 ++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 15 ++ README.md | 24 +++ src/lexer.rs | 386 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + src/parser.rs | 425 +++++++++++++++++++++++++++++++++++++++++++++++++ src/span.rs | 122 ++++++++++++++ 8 files changed, 1407 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 src/lexer.rs create mode 100644 src/lib.rs create mode 100644 src/parser.rs create mode 100644 src/span.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..3e1260c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,431 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "backtrace-ext" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" +dependencies = [ + "backtrace", +] + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "is_ci" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "miette" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98a72adfa0c7ae88ba0abcbd00047a476616c66b831d628b8ac7f1e9de0cfd67" +dependencies = [ + "backtrace", + "backtrace-ext", + "miette-derive", + "owo-colors", + "supports-color", + "supports-hyperlinks", + "supports-unicode", + "terminal_size", + "textwrap", + "thiserror", + "unicode-width", +] + +[[package]] +name = "miette-derive" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "279def6bf114a34b3cf887489eb440d4dfcf709ab3ce9955e4a6f957ce5cce77" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "owo-colors" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caff54706df99d2a78a5a4e3455ff45448d81ef1bb63c22cd14052ca0e993a3f" + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ringels" +version = "0.1.0" +dependencies = [ + "miette", + "thiserror", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + +[[package]] +name = "supports-color" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9829b314621dfc575df4e409e79f9d6a66a3bd707ab73f23cb4aa3a854ac854f" +dependencies = [ + "is_ci", +] + +[[package]] +name = "supports-hyperlinks" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c0a1e5168041f5f3ff68ff7d95dcb9c8749df29f6e7e89ada40dd4c9de404ee" + +[[package]] +name = "supports-unicode" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7401a30af6cb5818bb64852270bb722533397edcfc7344954a38f420819ece2" + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "textwrap" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a4bf0ff --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "ringels" +description = "Simple S-Expression parser" +version = "0.1.0" +edition = "2021" + +[features] +miette = ["dep:miette"] + +[dependencies] +thiserror = "1" +miette = { version = "7", optional = true } + +[dev-dependencies] +miette = { version = "7", features = ["fancy"] } \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2a83c8e --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# ß (Ringel-S) + +A simple S-Expression parser + +## Features + +- [miette](http://crates.io/crates/miette) error reporting with source annotation + > use the `miette` feature and either `OffsetTracker` or `LineTracker` as tracker +- Location tracking of tokens and nodes +- UTF-8/String first +- Top level can contain multiple nodes + +## Example + +```rust +fn example() { + let parser = ParserOptions::new() + .with_comments() + .build_with_tracker::("(hello #| world |#)"); + + let Some(Ok(node)) = parser.next() else { panic!(":(") }; + +} +``` \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..9106481 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,386 @@ +use std::borrow::Cow; +use std::fmt::Debug; +use std::iter::Peekable; +use std::str::CharIndices; +use crate::span::{LineOffset, LineSpan, Span}; + +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum Token<'a> { + /// ( + LParen, + /// ) + RParen, + /// atom + Atom(&'a str), + /// "string" + String(Cow<'a, str>), + /// `#|` + LComment, + /// `|#` + RComment, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Error<'a, O> { + UnsupportedEscape { + input: &'a str, + offset: O, + character: char, + span: Span, O>, + }, + MissingEndQuote { + input: &'a str, + offset: O, + span: Span, O>, + }, +} + +#[derive(Debug, Clone)] +pub struct Lexer<'a, T: Tracker = NoTracker> { + pub(crate) input: &'a str, + reader: Peekable>, + tracker: T, +} + +impl<'a> Lexer<'a> { + #[inline] + pub fn new(input: &'a str) -> Self { + Lexer::with_tracker::(input) + } + + pub fn with_tracker(input: &'a str) -> Lexer<'a, T> { + Lexer { + input, + reader: input.char_indices().peekable(), + tracker: T::default(), + } + } +} + +impl<'a, T: Tracker> Lexer<'a, T> { + pub fn next(&mut self) -> Option, T::Offset>, Error<'a, T::Offset>>> { + while let Some((start_offset, c)) = self.reader.next() { + self.tracker.start(start_offset); + if c.is_whitespace() { + if c == '\n' { + self.tracker.process_newline(start_offset); + } + + let mut last_offset = start_offset; + let mut last_char = c; + while let Some((offset, c)) = self.reader.next_if(|(_, c)| c.is_whitespace()) { + if c == '\n' { + self.tracker.process_newline(offset); + } + last_offset = offset; + last_char = c; + } + self.tracker.end(last_offset + last_char.len_utf8(), ()); + continue; + } + + + match c { + '(' => return Some(Ok(self.tracker.end(start_offset + 1, Token::LParen))), + ')' => return Some(Ok(self.tracker.end(start_offset + 1, Token::RParen))), + '"' => { + let mut backup = String::with_capacity(256); + // " is always 1 byte + let mut last_offset = start_offset + 1; + let mut used_backup = false; + let mut filled_backup = false; + let mut escape = false; + + while let Some((offset, c)) = self.reader.next() { + if escape { + let prev_offset = last_offset; + // only supports escapes 1 byte wide for now + last_offset = offset + 1; + + #[inline(always)] + fn fill_backup(filled_backup: &mut bool, backup: &mut String, prev_offset: usize, offset: usize, input: &str) { + if !*filled_backup { + backup.push_str(&input[prev_offset..offset - 1]); + *filled_backup = true; + } + } + + match c { + 'n' => { + fill_backup(&mut filled_backup, &mut backup, prev_offset, offset, self.input); + backup.push('\n'); + } + '\\' => { + fill_backup(&mut filled_backup, &mut backup, prev_offset, offset, self.input); + backup.push('\\'); + } + '"' => { + fill_backup(&mut filled_backup, &mut backup, prev_offset, offset, self.input); + backup.push('"'); + } + _ => { + let span = self.tracker.end(offset + 1, if filled_backup { + Cow::Owned(backup) + } else { + Cow::Borrowed(&self.input[start_offset + 1..offset - 1]) + }); + + return Some(Err(Error::UnsupportedEscape { + input: self.input, + offset: self.tracker.get_offset(offset - 1), + character: c, + span, + })); + } + } + + escape = false; + continue; + } + + match c { + '\\' => { + escape = true; + used_backup = true; + if filled_backup { + backup.push_str(&self.input[last_offset..offset]); + } + } + '"' => { + return Some(Ok(self.tracker.end(offset + 1, if used_backup { + backup.push_str(&self.input[last_offset..offset]); + Token::String(Cow::Owned(backup)) + } else { + Token::String(Cow::Borrowed(&self.input[last_offset..offset])) + }))); + } + + _ => continue, + } + } + + let span = self.tracker.end(self.input.len(), if used_backup { + backup.push_str(&self.input[last_offset..]); + Cow::Owned(backup) + } else { + Cow::Borrowed(&self.input[last_offset..]) + }); + + return Some(Err(Error::MissingEndQuote { + input: self.input, + offset: span.end, + span, + })); + } + '#' => { + if let Some((_, '|')) = self.reader.peek() { + // consume the pipe + self.reader.next(); + return Some(Ok(self.tracker.end(start_offset + 2, Token::LComment))); + } + + // fall through to atom + } + + '|' => { + if let Some((_, '#')) = self.reader.peek() { + // consume the pipe + self.reader.next(); + return Some(Ok(self.tracker.end(start_offset + 2, Token::RComment))); + } + + // fall through to atom + } + + _ => {} + } + + while let Some((end_offset, c)) = self.reader.peek().copied() { + if c == ')' || c == '(' || c.is_whitespace() || c == '"' || (c == '|' && self.input.as_bytes()[end_offset + 1] == b'#') || (c == '#' && self.input.as_bytes()[end_offset + 1] == b'|') { + return Some(Ok(self.tracker.end(end_offset, Token::Atom(&self.input[start_offset..end_offset])))); + } + + self.reader.next(); + } + + return Some(Ok(self.tracker.end(self.input.len(), Token::Atom(&self.input[start_offset..])))); + } + + None + } +} + +pub trait Tracker { + type Offset: Copy + Debug; + + fn process_newline(&mut self, _offset: usize) {} + + fn start(&mut self, _offset: usize) {} + + fn end(&mut self, offset: usize, value: T) -> Span; + + fn get_offset(&self, offset: usize) -> Self::Offset; +} + +#[derive(Default, Debug, Copy, Clone)] +pub struct NoTracker; + +impl Tracker for NoTracker { + type Offset = (); + + fn end(&mut self, _offset: usize, value: T) -> Span { + Span::new((), (), value) + } + + fn get_offset(&self, _offset: usize) -> Self::Offset { + () + } +} + +pub struct OffsetTracker { + start: usize, +} + +impl Default for OffsetTracker { + fn default() -> Self { + OffsetTracker { + start: 0, + } + } +} + +impl Tracker for OffsetTracker { + type Offset = usize; + + fn start(&mut self, offset: usize) { + self.start = offset; + } + + fn end(&mut self, offset: usize, value: T) -> Span { + Span { + start: self.start, + end: offset, + value, + } + } + + fn get_offset(&self, offset: usize) -> Self::Offset { + offset + } +} + +pub struct LineTracker { + start: LineOffset, + last_line_offset: usize, + line_n: usize, +} + +impl Default for LineTracker { + fn default() -> Self { + LineTracker { + start: LineOffset { + line: 0, + line_offset: 0, + absolute_offset: 0, + }, + last_line_offset: 0, + line_n: 0, + } + } +} + +impl Tracker for LineTracker { + type Offset = LineOffset; + + fn process_newline(&mut self, offset: usize) { + self.line_n += 1; + self.last_line_offset = offset + 1; + } + + fn start(&mut self, offset: usize) { + self.start = self.get_offset(offset) + } + + fn end(&mut self, offset: usize, value: T) -> LineSpan { + Span { + start: self.start, + end: self.get_offset(offset), + value, + } + } + + fn get_offset(&self, offset: usize) -> Self::Offset { + LineOffset { + line: self.line_n, + line_offset: offset - self.last_line_offset, + absolute_offset: offset, + } + } +} + + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + use crate::lexer::{Error, Lexer, LineTracker, OffsetTracker, Token}; + use crate::span::{LineOffset, LineSpan, Span}; + + #[test] + fn test_simple() { + let mut lex = Lexer::new("( ) #| |# \"hello\" hello \"hello\\n\"\n \n"); + + assert_eq!(lex.next().unwrap().unwrap().value, Token::LParen); + assert_eq!(lex.next().unwrap().unwrap().value, Token::RParen); + assert_eq!(lex.next().unwrap().unwrap().value, Token::LComment); + assert_eq!(lex.next().unwrap().unwrap().value, Token::RComment); + assert_eq!(lex.next().unwrap().unwrap().value, Token::String(Cow::Borrowed("hello"))); + assert_eq!(lex.next().unwrap().unwrap().value, Token::Atom("hello")); + assert_eq!(lex.next().unwrap().unwrap().value, Token::String(Cow::Owned("hello\n".to_string()))); + assert!(lex.next().is_none()) + } + + #[test] + fn test_offset_tracker() { + let mut lex = Lexer::with_tracker::(r#"( ) #| |# "hello" hello "hello\n\\\"""#); + + assert_eq!(lex.next().unwrap().unwrap(), Span::new(0, 1, Token::LParen)); + assert_eq!(lex.next().unwrap().unwrap(), Span::new(2, 3, Token::RParen)); + assert_eq!(lex.next().unwrap().unwrap(), Span::new(4, 6, Token::LComment)); + assert_eq!(lex.next().unwrap().unwrap(), Span::new(7, 9, Token::RComment)); + assert_eq!(lex.next().unwrap().unwrap(), Span::new(10, 17, Token::String(Cow::Borrowed("hello")))); + assert_eq!(lex.next().unwrap().unwrap(), Span::new(18, 23, Token::Atom("hello"))); + assert_eq!(lex.next().unwrap().unwrap(), Span::new(24, 37, Token::String(Cow::Owned("hello\n\\\"".to_string())))); + assert!(lex.next().is_none()) + } + + #[test] + fn test_line_tracker() { + let mut lex = Lexer::with_tracker::("(\n )"); + + assert_eq!(lex.next().unwrap().unwrap(), LineSpan::new(LineOffset::new(0, 0, 0), LineOffset::new(0, 1, 1), Token::LParen)); + assert_eq!(lex.next().unwrap().unwrap(), LineSpan::new(LineOffset::new(1, 1, 3), LineOffset::new(1, 2, 4), Token::RParen)); + assert!(lex.next().is_none()) + } + + #[test] + fn test_error() { + let mut lex = Lexer::new("\"hello"); + let item = lex.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::MissingEndQuote { + input: "\"hello", + offset: (), + span: Span::new((), (), Cow::Borrowed("hello")), + }))); + + let mut lex = Lexer::new("\"hello\\x"); + let item = lex.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::UnsupportedEscape { + input: "\"hello\\x", + offset: (), + character: 'x', + span: Span::new((), (), Cow::Borrowed("hello")), + }))); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..233b037 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,3 @@ +pub mod lexer; +pub mod span; +pub mod parser; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..db53f4d --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,425 @@ +use std::borrow::Cow; +use std::fmt::Debug; +#[cfg(feature = "miette")] +use miette::{LabeledSpan, SourceCode}; +use crate::lexer; +use crate::lexer::{Lexer, NoTracker, Token, Tracker}; +use crate::span::Span; +#[cfg(feature = "miette")] +use crate::span::SpanOffset; + +pub struct Parser<'a, T: Tracker = NoTracker> { + exclude_comments: bool, + lexer: Lexer<'a, T>, +} + +#[derive(Debug, Copy, Clone)] +pub struct ParserOptions { + pub exclude_comments: bool, +} + +impl ParserOptions { + pub fn new() -> Self { + ParserOptions { + exclude_comments: true, + } + } + + pub fn include_comments(mut self) -> Self { + self.exclude_comments = false; + self + } + + pub fn exclude_comments(mut self) -> Self { + self.exclude_comments = true; + self + } + + pub fn build(self, input: &str) -> Parser { + self.build_with_tracker::(input) + } + + pub fn build_with_tracker(self, input: &str) -> Parser { + Parser::with_options::(input, self) + } +} + +impl Default for ParserOptions { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl<'a> Parser<'a> { + pub fn new(input: &'a str) -> Parser<'a> { + Parser::with_tracker::(input) + } + + pub fn with_options(input: &'a str, options: ParserOptions) -> Parser<'a, T> { + Parser { + exclude_comments: options.exclude_comments, + lexer: Lexer::with_tracker::(input), + } + } + + pub fn with_tracker(input: &'a str) -> Parser<'a, T> { + Parser::with_options(input, Default::default()) + } +} + +type NodeSpan<'a, Offset> = Span, Offset>; + +enum NextResult<'a, O> { + None, + CloseGroup(Span, O>), + Node(NodeSpan<'a, O>), +} + +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum Error { + #[error("unsupported escape character '{character}'")] + UnsupportedEscape { + #[cfg(feature = "miette")] + input: String, + offset: O, + character: char, + span: Span, + }, + #[error("missing end quote")] + MissingEndQuote { + #[cfg(feature = "miette")] + input: String, + offset: O, + span: Span, + }, + #[error("missing closing bracket for group")] + MissingClosingBracket { + #[cfg(feature = "miette")] + input: String, + start_offset: O, + }, + #[error("dangling close comment token")] + DanglingCloseComment { + #[cfg(feature = "miette")] + input: String, + offset: O, + span: Span<(), O>, + }, + #[error("dangling close group paren")] + DanglingCloseParen { + #[cfg(feature = "miette")] + input: String, + offset: O, + span: Span<(), O>, + }, +} + +#[cfg(feature = "miette")] +impl<'a, O: SpanOffset + Debug> miette::Diagnostic for Error { + fn source_code(&self) -> Option<&dyn SourceCode> { + match self { + Error::UnsupportedEscape { input, .. } | + Error::MissingEndQuote { input, .. } | + Error::MissingClosingBracket { input, .. } | + Error::DanglingCloseComment { input, .. } | + Error::DanglingCloseParen { input, .. } => Some(input) + } + } + + fn labels(&self) -> Option + '_>> { + let mut data = vec![]; + match self { + Error::UnsupportedEscape { character, offset, span, .. } => { + data.push(LabeledSpan::new_primary_with_span(Some("here".to_string()), (offset.absolute_offset(), character.len_utf8()))); + data.push(LabeledSpan::new_with_span(Some("in this string".to_string()), span)); + } + Error::MissingEndQuote { span, .. } => { + data.push(LabeledSpan::new_primary_with_span(None, span)); + } + Error::DanglingCloseComment { span, .. } | Error::DanglingCloseParen { span, .. } => { + data.push(LabeledSpan::new_primary_with_span(None, span)); + } + Error::MissingClosingBracket { start_offset, .. } => { + data.push(LabeledSpan::new_primary_with_span(Some("group started here".to_string()), (start_offset.absolute_offset(), 1))); + } + } + + Some(Box::new(data.into_iter())) + } +} + +impl<'a, O: Debug> From> for Error { + fn from(value: lexer::Error<'a, O>) -> Self { + match value { + lexer::Error::UnsupportedEscape { + input: _input, offset, character, span + } => Error::UnsupportedEscape { + #[cfg(feature = "miette")] + input: _input.to_string(), + offset, + character, + span: span.into_string(), + }, + lexer::Error::MissingEndQuote { input: _input, offset, span } => Error::MissingEndQuote { + #[cfg(feature = "miette")] + input: _input.to_string(), + offset, + span: span.into_string(), + } + } + } +} + +impl<'a, T: Tracker> Parser<'a, T> { + fn inner_next(&mut self) -> Result, Error> { + let mut start: T::Offset; + 'top: while let Some(token) = self.lexer.next() { + let token = token?; + start = token.start; + match token.value { + Token::LParen => { + let mut items = vec![]; + + loop { + match self.inner_next()? { + NextResult::None => { + return Err(Error::MissingClosingBracket { + #[cfg(feature = "miette")] + input: self.lexer.input.to_string(), + start_offset: start, + }); + } + NextResult::Node(node) => { + items.push(node) + } + NextResult::CloseGroup(span) => { + return Ok(NextResult::Node(Span::new(start, span.end, Node::Group(items)))); + } + } + } + } + Token::RParen => { + return Ok(NextResult::CloseGroup(token)); + } + + Token::LComment => { + let mut c_depth = 1; + while let Some(token) = self.lexer.next() { + let token = token?; + match token.value { + Token::LComment => { + c_depth += 1; + } + + Token::RComment => { + c_depth -= 1; + if c_depth == 0 { + if self.exclude_comments { + continue 'top; + } + + return Ok(NextResult::Node(Span::new(start, token.end, Node::Comment))); + } + } + _ => {} + } + } + } + + Token::RComment => { + return Err(Error::DanglingCloseComment { + #[cfg(feature = "miette")] + input: self.lexer.input.to_string(), + offset: start, + span: Span::new(token.start, token.end, ()), + }); + } + + Token::String(str) => { + return Ok(NextResult::Node(Span::new(token.start, token.end, Node::String(str)))); + } + Token::Atom(str) => { + return Ok(NextResult::Node(Span::new(token.start, token.end, Node::Atom(str)))); + } + } + } + + Ok(NextResult::None) + } + + pub fn next(&mut self) -> Option, Error>> { + match self.inner_next() { + Err(e) => Some(Err(e)), + Ok(NextResult::None) => None, + Ok(NextResult::CloseGroup(span)) => Some(Err(Error::DanglingCloseParen { + #[cfg(feature = "miette")] + input: self.lexer.input.to_string(), + offset: span.start, + span: span.empty(), + })), + Ok(NextResult::Node(node)) => Some(Ok(node)), + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Node<'a, Offset = ()> { + Group(Vec, Offset>>), + Atom(&'a str), + String(Cow<'a, str>), + Comment, +} + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + use crate::lexer::{LineTracker, OffsetTracker}; + use crate::parser::{Error, Node, Parser, ParserOptions}; + use crate::span::Span; + + #[test] + pub fn test_simple() { + let mut parser = Parser::new(":"); + assert_eq!(parser.next().unwrap().unwrap().value, Node::Atom(":")); + assert!(parser.next().is_none()); + + let mut parser = Parser::new(r#"(:next "wow")"#); + let Some(Ok(Span { value: Node::Group(items), .. })) = parser.next() else { panic!() }; + assert_eq!(items.len(), 2); + assert_eq!(items[0].value, Node::Atom(":next")); + assert_eq!(items[1].value, Node::String(Cow::Borrowed("wow"))); + } + + #[test] + pub fn test_example() { + let example = r#"(service "dbus" + (env "DBUS_SESSION_BUS_ADDRESS" :export) + (exec dbus-daemon --nofork --session ("--print-address=" (fd :env "DBUS_SESSION_BUS_ADDRESS"))) + (layer interactive) +) + +(service "ssh-agent" + (env "SSH_AUTH_SOCK" :export (create-socket)) + (exec ssh-agent -D -a (env "SSH_AUTH_SOCK")) + (layer interactive) +) + +(service "pipewire" + (exec pipewire) + (needs (:after dbus)) +) + +(service "pipewire-pulse" + (exec pipewire-pulse) + (needs (:after pipewire)) +) + +(service "wireplumber" + (exec wireplumber) + (needs (:after pipewire)) +)"#; + + let mut parser = Parser::with_tracker::(example); + while let Some(_) = parser.next() {} + } + + #[test] + fn test_ignore_comments() { + let mut parser = Parser::new("#| #| hello! |# |# : #| bye! |#"); + assert_eq!(parser.next().unwrap().unwrap().value, Node::Atom(":")); + assert!(parser.next().is_none()); + + let mut parser = Parser::new(r#"(:next #| hello! |# "wow")"#); + let Some(Ok(Span { value, .. })) = parser.next() else { panic!() }; + format!("{:?}", value); + let Node::Group(items) = value else { panic!() }; + assert_eq!(items.len(), 2); + assert_eq!(items[0].value, Node::Atom(":next")); + assert_eq!(items[1].value, Node::String(Cow::Borrowed("wow"))); + + let mut parser = ParserOptions::new().include_comments().build("#| hello |#"); + let item = parser.next(); + assert_eq!(item, Some(Ok(Span::new((), (), Node::Comment)))); + + let mut parser = ParserOptions::new().include_comments().exclude_comments().build("#| hello |#"); + let item = parser.next(); + assert_eq!(item, None); + + format!("{:?}", ParserOptions::new()); + } + + #[test] + fn test_errors() { + let mut parser = Parser::with_tracker::("|#"); + let item = parser.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::DanglingCloseComment { + #[cfg(feature = "miette")] + input: "|#".to_string(), + offset: 0, + span: Span::new(0, 2, ()), + }))); + + let mut parser = Parser::with_tracker::(")"); + let item = parser.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::DanglingCloseParen { + #[cfg(feature = "miette")] + input: "|#".to_string(), + offset: 0, + span: Span::new(0, 1, ()), + }))); + + let mut parser = Parser::with_tracker::("\"hello\\x\""); + let item = parser.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::UnsupportedEscape { + #[cfg(feature = "miette")] + input: "\"hello\\x\"".to_string(), + offset: 6, + character: 'x', + span: Span::new(0, 8, "hello".to_string()), + }))); + + let mut parser = Parser::with_tracker::("\"\\n\\x\""); + let item = parser.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::UnsupportedEscape { + #[cfg(feature = "miette")] + input: "\"\\n\\x\"".to_string(), + offset: 3, + character: 'x', + span: Span::new(0, 5, "\n".to_string()), + }))); + + let mut parser = Parser::with_tracker::("\"hello"); + let item = parser.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::MissingEndQuote { + #[cfg(feature = "miette")] + input: "\"hello".to_string(), + offset: 6, + span: Span::new(0, 6, "hello".to_string()), + }))); + + let mut parser = Parser::with_tracker::("\"hello\\n"); + let item = parser.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::MissingEndQuote { + #[cfg(feature = "miette")] + input: "\"hello\\n".to_string(), + offset: 8, + span: Span::new(0, 8, "hello\n".to_string()), + }))); + + let mut parser = Parser::with_tracker::("("); + let item = parser.next(); + format!("{:?}", item); + assert_eq!(item, Some(Err(Error::MissingClosingBracket { + #[cfg(feature = "miette")] + input: "(".to_string(), + start_offset: 0, + }))); + } +} \ No newline at end of file diff --git a/src/span.rs b/src/span.rs new file mode 100644 index 0000000..fd45ba7 --- /dev/null +++ b/src/span.rs @@ -0,0 +1,122 @@ +use std::ops::{Deref, DerefMut}; +#[cfg(feature = "miette")] +use miette::SourceSpan; + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)] +pub struct Span { + pub start: O, + pub end: O, + pub value: V, +} + +impl Span { + pub fn with_value(self, value: N) -> Span { + Span { + start: self.start, + end: self.end, + value, + } + } + + pub fn empty(self) -> Span<(), O> { + self.with_value(()) + } +} + +impl Span { + pub fn into_string(self) -> Span { + Span { + start: self.start, + end: self.end, + value: self.value.to_string() + } + } +} + +pub trait SpanOffset { + fn absolute_offset(&self) -> usize; +} + +impl SpanOffset for usize { + fn absolute_offset(&self) -> usize { + *self + } +} + +impl Span { + #[inline] + pub fn new(start: O, end: O, value: V) -> Self { + Span { + start, + end, + value, + } + } + + pub fn unwrap(self) -> V { + self.value + } +} + +impl Deref for Span { + type Target = V; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl DerefMut for Span { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)] +pub struct LineOffset { + pub line: usize, + pub line_offset: usize, + pub absolute_offset: usize, +} + +impl LineOffset { + pub fn new(line: usize, line_offset: usize, absolute_offset: usize) -> LineOffset { + LineOffset { + line, + line_offset, + absolute_offset, + } + } +} + +impl SpanOffset for LineOffset { + fn absolute_offset(&self) -> usize { + self.absolute_offset + } +} + +pub type LineSpan = Span; + +#[cfg(feature = "miette")] +impl Into for &Span { + fn into(self) -> SourceSpan { + (self.start.absolute_offset(), self.end.absolute_offset() - self.start.absolute_offset()).into() + } +} + +#[cfg(test)] +mod tests { + use crate::lexer::{LineTracker, Tracker}; + use crate::span::{LineOffset, Span, SpanOffset}; + + #[test] + pub fn simple_test() { + let mut line_tracker = LineTracker::default(); + + line_tracker.start(0); + line_tracker.process_newline(2); + assert_eq!(Span::new(LineOffset::new(0, 0, 0), LineOffset::new(1, 1, 4), ()), line_tracker.end(4, ())); + assert_eq!(1, LineOffset::new(0, 0, 1).absolute_offset()); + assert_eq!(1, 1usize.absolute_offset()); + } +} \ No newline at end of file