Initial commit

main
eater 3 months ago
parent a41be3767d
commit e37938a394

1
.gitignore vendored

@ -0,0 +1 @@
target

431
Cargo.lock generated

@ -0,0 +1,431 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "addr2line"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "backtrace"
version = "0.3.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "backtrace-ext"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50"
dependencies = [
"backtrace",
]
[[package]]
name = "bitflags"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "errno"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "gimli"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "is_ci"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45"
[[package]]
name = "libc"
version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "linux-raw-sys"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
[[package]]
name = "memchr"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "miette"
version = "7.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98a72adfa0c7ae88ba0abcbd00047a476616c66b831d628b8ac7f1e9de0cfd67"
dependencies = [
"backtrace",
"backtrace-ext",
"miette-derive",
"owo-colors",
"supports-color",
"supports-hyperlinks",
"supports-unicode",
"terminal_size",
"textwrap",
"thiserror",
"unicode-width",
]
[[package]]
name = "miette-derive"
version = "7.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "279def6bf114a34b3cf887489eb440d4dfcf709ab3ce9955e4a6f957ce5cce77"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "miniz_oxide"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
dependencies = [
"adler",
]
[[package]]
name = "object"
version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
dependencies = [
"memchr",
]
[[package]]
name = "owo-colors"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caff54706df99d2a78a5a4e3455ff45448d81ef1bb63c22cd14052ca0e993a3f"
[[package]]
name = "proc-macro2"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ringels"
version = "0.1.0"
dependencies = [
"miette",
"thiserror",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustix"
version = "0.38.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.52.0",
]
[[package]]
name = "smawk"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
[[package]]
name = "supports-color"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9829b314621dfc575df4e409e79f9d6a66a3bd707ab73f23cb4aa3a854ac854f"
dependencies = [
"is_ci",
]
[[package]]
name = "supports-hyperlinks"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c0a1e5168041f5f3ff68ff7d95dcb9c8749df29f6e7e89ada40dd4c9de404ee"
[[package]]
name = "supports-unicode"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7401a30af6cb5818bb64852270bb722533397edcfc7344954a38f420819ece2"
[[package]]
name = "syn"
version = "2.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "terminal_size"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7"
dependencies = [
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "textwrap"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
dependencies = [
"smawk",
"unicode-linebreak",
"unicode-width",
]
[[package]]
name = "thiserror"
version = "1.0.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-linebreak"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f"
[[package]]
name = "unicode-width"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"

@ -0,0 +1,15 @@
[package]
name = "ringels"
description = "Simple S-Expression parser"
version = "0.1.0"
edition = "2021"
[features]
miette = ["dep:miette"]
[dependencies]
thiserror = "1"
miette = { version = "7", optional = true }
[dev-dependencies]
miette = { version = "7", features = ["fancy"] }

@ -0,0 +1,24 @@
# ß (Ringel-S)
A simple S-Expression parser
## Features
- [miette](http://crates.io/crates/miette) error reporting with source annotation
> use the `miette` feature and either `OffsetTracker` or `LineTracker` as tracker
- Location tracking of tokens and nodes
- UTF-8/String first
- Top level can contain multiple nodes
## Example
```rust
fn example() {
let parser = ParserOptions::new()
.with_comments()
.build_with_tracker::<OffsetTracker>("(hello #| world |#)");
let Some(Ok(node)) = parser.next() else { panic!(":(") };
}
```

@ -0,0 +1,386 @@
use std::borrow::Cow;
use std::fmt::Debug;
use std::iter::Peekable;
use std::str::CharIndices;
use crate::span::{LineOffset, LineSpan, Span};
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token<'a> {
/// (
LParen,
/// )
RParen,
/// atom
Atom(&'a str),
/// "string"
String(Cow<'a, str>),
/// `#|`
LComment,
/// `|#`
RComment,
}
#[derive(Debug, Eq, PartialEq)]
pub enum Error<'a, O> {
UnsupportedEscape {
input: &'a str,
offset: O,
character: char,
span: Span<Cow<'a, str>, O>,
},
MissingEndQuote {
input: &'a str,
offset: O,
span: Span<Cow<'a, str>, O>,
},
}
#[derive(Debug, Clone)]
pub struct Lexer<'a, T: Tracker = NoTracker> {
pub(crate) input: &'a str,
reader: Peekable<CharIndices<'a>>,
tracker: T,
}
impl<'a> Lexer<'a> {
#[inline]
pub fn new(input: &'a str) -> Self {
Lexer::with_tracker::<NoTracker>(input)
}
pub fn with_tracker<T: Default + Tracker>(input: &'a str) -> Lexer<'a, T> {
Lexer {
input,
reader: input.char_indices().peekable(),
tracker: T::default(),
}
}
}
impl<'a, T: Tracker> Lexer<'a, T> {
pub fn next(&mut self) -> Option<Result<Span<Token<'a>, T::Offset>, Error<'a, T::Offset>>> {
while let Some((start_offset, c)) = self.reader.next() {
self.tracker.start(start_offset);
if c.is_whitespace() {
if c == '\n' {
self.tracker.process_newline(start_offset);
}
let mut last_offset = start_offset;
let mut last_char = c;
while let Some((offset, c)) = self.reader.next_if(|(_, c)| c.is_whitespace()) {
if c == '\n' {
self.tracker.process_newline(offset);
}
last_offset = offset;
last_char = c;
}
self.tracker.end(last_offset + last_char.len_utf8(), ());
continue;
}
match c {
'(' => return Some(Ok(self.tracker.end(start_offset + 1, Token::LParen))),
')' => return Some(Ok(self.tracker.end(start_offset + 1, Token::RParen))),
'"' => {
let mut backup = String::with_capacity(256);
// " is always 1 byte
let mut last_offset = start_offset + 1;
let mut used_backup = false;
let mut filled_backup = false;
let mut escape = false;
while let Some((offset, c)) = self.reader.next() {
if escape {
let prev_offset = last_offset;
// only supports escapes 1 byte wide for now
last_offset = offset + 1;
#[inline(always)]
fn fill_backup(filled_backup: &mut bool, backup: &mut String, prev_offset: usize, offset: usize, input: &str) {
if !*filled_backup {
backup.push_str(&input[prev_offset..offset - 1]);
*filled_backup = true;
}
}
match c {
'n' => {
fill_backup(&mut filled_backup, &mut backup, prev_offset, offset, self.input);
backup.push('\n');
}
'\\' => {
fill_backup(&mut filled_backup, &mut backup, prev_offset, offset, self.input);
backup.push('\\');
}
'"' => {
fill_backup(&mut filled_backup, &mut backup, prev_offset, offset, self.input);
backup.push('"');
}
_ => {
let span = self.tracker.end(offset + 1, if filled_backup {
Cow::Owned(backup)
} else {
Cow::Borrowed(&self.input[start_offset + 1..offset - 1])
});
return Some(Err(Error::UnsupportedEscape {
input: self.input,
offset: self.tracker.get_offset(offset - 1),
character: c,
span,
}));
}
}
escape = false;
continue;
}
match c {
'\\' => {
escape = true;
used_backup = true;
if filled_backup {
backup.push_str(&self.input[last_offset..offset]);
}
}
'"' => {
return Some(Ok(self.tracker.end(offset + 1, if used_backup {
backup.push_str(&self.input[last_offset..offset]);
Token::String(Cow::Owned(backup))
} else {
Token::String(Cow::Borrowed(&self.input[last_offset..offset]))
})));
}
_ => continue,
}
}
let span = self.tracker.end(self.input.len(), if used_backup {
backup.push_str(&self.input[last_offset..]);
Cow::Owned(backup)
} else {
Cow::Borrowed(&self.input[last_offset..])
});
return Some(Err(Error::MissingEndQuote {
input: self.input,
offset: span.end,
span,
}));
}
'#' => {
if let Some((_, '|')) = self.reader.peek() {
// consume the pipe
self.reader.next();
return Some(Ok(self.tracker.end(start_offset + 2, Token::LComment)));
}
// fall through to atom
}
'|' => {
if let Some((_, '#')) = self.reader.peek() {
// consume the pipe
self.reader.next();
return Some(Ok(self.tracker.end(start_offset + 2, Token::RComment)));
}
// fall through to atom
}
_ => {}
}
while let Some((end_offset, c)) = self.reader.peek().copied() {
if c == ')' || c == '(' || c.is_whitespace() || c == '"' || (c == '|' && self.input.as_bytes()[end_offset + 1] == b'#') || (c == '#' && self.input.as_bytes()[end_offset + 1] == b'|') {
return Some(Ok(self.tracker.end(end_offset, Token::Atom(&self.input[start_offset..end_offset]))));
}
self.reader.next();
}
return Some(Ok(self.tracker.end(self.input.len(), Token::Atom(&self.input[start_offset..]))));
}
None
}
}
pub trait Tracker {
type Offset: Copy + Debug;
fn process_newline(&mut self, _offset: usize) {}
fn start(&mut self, _offset: usize) {}
fn end<T>(&mut self, offset: usize, value: T) -> Span<T, Self::Offset>;
fn get_offset(&self, offset: usize) -> Self::Offset;
}
#[derive(Default, Debug, Copy, Clone)]
pub struct NoTracker;
impl Tracker for NoTracker {
type Offset = ();
fn end<T>(&mut self, _offset: usize, value: T) -> Span<T, Self::Offset> {
Span::new((), (), value)
}
fn get_offset(&self, _offset: usize) -> Self::Offset {
()
}
}
pub struct OffsetTracker {
start: usize,
}
impl Default for OffsetTracker {
fn default() -> Self {
OffsetTracker {
start: 0,
}
}
}
impl Tracker for OffsetTracker {
type Offset = usize;
fn start(&mut self, offset: usize) {
self.start = offset;
}
fn end<T>(&mut self, offset: usize, value: T) -> Span<T> {
Span {
start: self.start,
end: offset,
value,
}
}
fn get_offset(&self, offset: usize) -> Self::Offset {
offset
}
}
pub struct LineTracker {
start: LineOffset,
last_line_offset: usize,
line_n: usize,
}
impl Default for LineTracker {
fn default() -> Self {
LineTracker {
start: LineOffset {
line: 0,
line_offset: 0,
absolute_offset: 0,
},
last_line_offset: 0,
line_n: 0,
}
}
}
impl Tracker for LineTracker {
type Offset = LineOffset;
fn process_newline(&mut self, offset: usize) {
self.line_n += 1;
self.last_line_offset = offset + 1;
}
fn start(&mut self, offset: usize) {
self.start = self.get_offset(offset)
}
fn end<T>(&mut self, offset: usize, value: T) -> LineSpan<T> {
Span {
start: self.start,
end: self.get_offset(offset),
value,
}
}
fn get_offset(&self, offset: usize) -> Self::Offset {
LineOffset {
line: self.line_n,
line_offset: offset - self.last_line_offset,
absolute_offset: offset,
}
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use crate::lexer::{Error, Lexer, LineTracker, OffsetTracker, Token};
use crate::span::{LineOffset, LineSpan, Span};
#[test]
fn test_simple() {
let mut lex = Lexer::new("( ) #| |# \"hello\" hello \"hello\\n\"\n \n");
assert_eq!(lex.next().unwrap().unwrap().value, Token::LParen);
assert_eq!(lex.next().unwrap().unwrap().value, Token::RParen);
assert_eq!(lex.next().unwrap().unwrap().value, Token::LComment);
assert_eq!(lex.next().unwrap().unwrap().value, Token::RComment);
assert_eq!(lex.next().unwrap().unwrap().value, Token::String(Cow::Borrowed("hello")));
assert_eq!(lex.next().unwrap().unwrap().value, Token::Atom("hello"));
assert_eq!(lex.next().unwrap().unwrap().value, Token::String(Cow::Owned("hello\n".to_string())));
assert!(lex.next().is_none())
}
#[test]
fn test_offset_tracker() {
let mut lex = Lexer::with_tracker::<OffsetTracker>(r#"( ) #| |# "hello" hello "hello\n\\\"""#);
assert_eq!(lex.next().unwrap().unwrap(), Span::new(0, 1, Token::LParen));
assert_eq!(lex.next().unwrap().unwrap(), Span::new(2, 3, Token::RParen));
assert_eq!(lex.next().unwrap().unwrap(), Span::new(4, 6, Token::LComment));
assert_eq!(lex.next().unwrap().unwrap(), Span::new(7, 9, Token::RComment));
assert_eq!(lex.next().unwrap().unwrap(), Span::new(10, 17, Token::String(Cow::Borrowed("hello"))));
assert_eq!(lex.next().unwrap().unwrap(), Span::new(18, 23, Token::Atom("hello")));
assert_eq!(lex.next().unwrap().unwrap(), Span::new(24, 37, Token::String(Cow::Owned("hello\n\\\"".to_string()))));
assert!(lex.next().is_none())
}
#[test]
fn test_line_tracker() {
let mut lex = Lexer::with_tracker::<LineTracker>("(\n )");
assert_eq!(lex.next().unwrap().unwrap(), LineSpan::new(LineOffset::new(0, 0, 0), LineOffset::new(0, 1, 1), Token::LParen));
assert_eq!(lex.next().unwrap().unwrap(), LineSpan::new(LineOffset::new(1, 1, 3), LineOffset::new(1, 2, 4), Token::RParen));
assert!(lex.next().is_none())
}
#[test]
fn test_error() {
let mut lex = Lexer::new("\"hello");
let item = lex.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::MissingEndQuote {
input: "\"hello",
offset: (),
span: Span::new((), (), Cow::Borrowed("hello")),
})));
let mut lex = Lexer::new("\"hello\\x");
let item = lex.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::UnsupportedEscape {
input: "\"hello\\x",
offset: (),
character: 'x',
span: Span::new((), (), Cow::Borrowed("hello")),
})));
}
}

@ -0,0 +1,3 @@
pub mod lexer;
pub mod span;
pub mod parser;

@ -0,0 +1,425 @@
use std::borrow::Cow;
use std::fmt::Debug;
#[cfg(feature = "miette")]
use miette::{LabeledSpan, SourceCode};
use crate::lexer;
use crate::lexer::{Lexer, NoTracker, Token, Tracker};
use crate::span::Span;
#[cfg(feature = "miette")]
use crate::span::SpanOffset;
pub struct Parser<'a, T: Tracker = NoTracker> {
exclude_comments: bool,
lexer: Lexer<'a, T>,
}
#[derive(Debug, Copy, Clone)]
pub struct ParserOptions {
pub exclude_comments: bool,
}
impl ParserOptions {
pub fn new() -> Self {
ParserOptions {
exclude_comments: true,
}
}
pub fn include_comments(mut self) -> Self {
self.exclude_comments = false;
self
}
pub fn exclude_comments(mut self) -> Self {
self.exclude_comments = true;
self
}
pub fn build(self, input: &str) -> Parser {
self.build_with_tracker::<NoTracker>(input)
}
pub fn build_with_tracker<T: Tracker + Default>(self, input: &str) -> Parser<T> {
Parser::with_options::<T>(input, self)
}
}
impl Default for ParserOptions {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl<'a> Parser<'a> {
pub fn new(input: &'a str) -> Parser<'a> {
Parser::with_tracker::<NoTracker>(input)
}
pub fn with_options<T: Tracker + Default>(input: &'a str, options: ParserOptions) -> Parser<'a, T> {
Parser {
exclude_comments: options.exclude_comments,
lexer: Lexer::with_tracker::<T>(input),
}
}
pub fn with_tracker<T: Tracker + Default>(input: &'a str) -> Parser<'a, T> {
Parser::with_options(input, Default::default())
}
}
type NodeSpan<'a, Offset> = Span<Node<'a, Offset>, Offset>;
enum NextResult<'a, O> {
None,
CloseGroup(Span<Token<'a>, O>),
Node(NodeSpan<'a, O>),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum Error<O: Debug> {
#[error("unsupported escape character '{character}'")]
UnsupportedEscape {
#[cfg(feature = "miette")]
input: String,
offset: O,
character: char,
span: Span<String, O>,
},
#[error("missing end quote")]
MissingEndQuote {
#[cfg(feature = "miette")]
input: String,
offset: O,
span: Span<String, O>,
},
#[error("missing closing bracket for group")]
MissingClosingBracket {
#[cfg(feature = "miette")]
input: String,
start_offset: O,
},
#[error("dangling close comment token")]
DanglingCloseComment {
#[cfg(feature = "miette")]
input: String,
offset: O,
span: Span<(), O>,
},
#[error("dangling close group paren")]
DanglingCloseParen {
#[cfg(feature = "miette")]
input: String,
offset: O,
span: Span<(), O>,
},
}
#[cfg(feature = "miette")]
impl<'a, O: SpanOffset + Debug> miette::Diagnostic for Error<O> {
fn source_code(&self) -> Option<&dyn SourceCode> {
match self {
Error::UnsupportedEscape { input, .. } |
Error::MissingEndQuote { input, .. } |
Error::MissingClosingBracket { input, .. } |
Error::DanglingCloseComment { input, .. } |
Error::DanglingCloseParen { input, .. } => Some(input)
}
}
fn labels(&self) -> Option<Box<dyn Iterator<Item=LabeledSpan> + '_>> {
let mut data = vec![];
match self {
Error::UnsupportedEscape { character, offset, span, .. } => {
data.push(LabeledSpan::new_primary_with_span(Some("here".to_string()), (offset.absolute_offset(), character.len_utf8())));
data.push(LabeledSpan::new_with_span(Some("in this string".to_string()), span));
}
Error::MissingEndQuote { span, .. } => {
data.push(LabeledSpan::new_primary_with_span(None, span));
}
Error::DanglingCloseComment { span, .. } | Error::DanglingCloseParen { span, .. } => {
data.push(LabeledSpan::new_primary_with_span(None, span));
}
Error::MissingClosingBracket { start_offset, .. } => {
data.push(LabeledSpan::new_primary_with_span(Some("group started here".to_string()), (start_offset.absolute_offset(), 1)));
}
}
Some(Box::new(data.into_iter()))
}
}
impl<'a, O: Debug> From<lexer::Error<'a, O>> for Error<O> {
fn from(value: lexer::Error<'a, O>) -> Self {
match value {
lexer::Error::UnsupportedEscape {
input: _input, offset, character, span
} => Error::UnsupportedEscape {
#[cfg(feature = "miette")]
input: _input.to_string(),
offset,
character,
span: span.into_string(),
},
lexer::Error::MissingEndQuote { input: _input, offset, span } => Error::MissingEndQuote {
#[cfg(feature = "miette")]
input: _input.to_string(),
offset,
span: span.into_string(),
}
}
}
}
impl<'a, T: Tracker> Parser<'a, T> {
fn inner_next(&mut self) -> Result<NextResult<'a, T::Offset>, Error<T::Offset>> {
let mut start: T::Offset;
'top: while let Some(token) = self.lexer.next() {
let token = token?;
start = token.start;
match token.value {
Token::LParen => {
let mut items = vec![];
loop {
match self.inner_next()? {
NextResult::None => {
return Err(Error::MissingClosingBracket {
#[cfg(feature = "miette")]
input: self.lexer.input.to_string(),
start_offset: start,
});
}
NextResult::Node(node) => {
items.push(node)
}
NextResult::CloseGroup(span) => {
return Ok(NextResult::Node(Span::new(start, span.end, Node::Group(items))));
}
}
}
}
Token::RParen => {
return Ok(NextResult::CloseGroup(token));
}
Token::LComment => {
let mut c_depth = 1;
while let Some(token) = self.lexer.next() {
let token = token?;
match token.value {
Token::LComment => {
c_depth += 1;
}
Token::RComment => {
c_depth -= 1;
if c_depth == 0 {
if self.exclude_comments {
continue 'top;
}
return Ok(NextResult::Node(Span::new(start, token.end, Node::Comment)));
}
}
_ => {}
}
}
}
Token::RComment => {
return Err(Error::DanglingCloseComment {
#[cfg(feature = "miette")]
input: self.lexer.input.to_string(),
offset: start,
span: Span::new(token.start, token.end, ()),
});
}
Token::String(str) => {
return Ok(NextResult::Node(Span::new(token.start, token.end, Node::String(str))));
}
Token::Atom(str) => {
return Ok(NextResult::Node(Span::new(token.start, token.end, Node::Atom(str))));
}
}
}
Ok(NextResult::None)
}
pub fn next(&mut self) -> Option<Result<NodeSpan<T::Offset>, Error<T::Offset>>> {
match self.inner_next() {
Err(e) => Some(Err(e)),
Ok(NextResult::None) => None,
Ok(NextResult::CloseGroup(span)) => Some(Err(Error::DanglingCloseParen {
#[cfg(feature = "miette")]
input: self.lexer.input.to_string(),
offset: span.start,
span: span.empty(),
})),
Ok(NextResult::Node(node)) => Some(Ok(node)),
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Node<'a, Offset = ()> {
Group(Vec<Span<Node<'a, Offset>, Offset>>),
Atom(&'a str),
String(Cow<'a, str>),
Comment,
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use crate::lexer::{LineTracker, OffsetTracker};
use crate::parser::{Error, Node, Parser, ParserOptions};
use crate::span::Span;
#[test]
pub fn test_simple() {
let mut parser = Parser::new(":");
assert_eq!(parser.next().unwrap().unwrap().value, Node::Atom(":"));
assert!(parser.next().is_none());
let mut parser = Parser::new(r#"(:next "wow")"#);
let Some(Ok(Span { value: Node::Group(items), .. })) = parser.next() else { panic!() };
assert_eq!(items.len(), 2);
assert_eq!(items[0].value, Node::Atom(":next"));
assert_eq!(items[1].value, Node::String(Cow::Borrowed("wow")));
}
#[test]
pub fn test_example() {
let example = r#"(service "dbus"
(env "DBUS_SESSION_BUS_ADDRESS" :export)
(exec dbus-daemon --nofork --session ("--print-address=" (fd :env "DBUS_SESSION_BUS_ADDRESS")))
(layer interactive)
)
(service "ssh-agent"
(env "SSH_AUTH_SOCK" :export (create-socket))
(exec ssh-agent -D -a (env "SSH_AUTH_SOCK"))
(layer interactive)
)
(service "pipewire"
(exec pipewire)
(needs (:after dbus))
)
(service "pipewire-pulse"
(exec pipewire-pulse)
(needs (:after pipewire))
)
(service "wireplumber"
(exec wireplumber)
(needs (:after pipewire))
)"#;
let mut parser = Parser::with_tracker::<LineTracker>(example);
while let Some(_) = parser.next() {}
}
#[test]
fn test_ignore_comments() {
let mut parser = Parser::new("#| #| hello! |# |# : #| bye! |#");
assert_eq!(parser.next().unwrap().unwrap().value, Node::Atom(":"));
assert!(parser.next().is_none());
let mut parser = Parser::new(r#"(:next #| hello! |# "wow")"#);
let Some(Ok(Span { value, .. })) = parser.next() else { panic!() };
format!("{:?}", value);
let Node::Group(items) = value else { panic!() };
assert_eq!(items.len(), 2);
assert_eq!(items[0].value, Node::Atom(":next"));
assert_eq!(items[1].value, Node::String(Cow::Borrowed("wow")));
let mut parser = ParserOptions::new().include_comments().build("#| hello |#");
let item = parser.next();
assert_eq!(item, Some(Ok(Span::new((), (), Node::Comment))));
let mut parser = ParserOptions::new().include_comments().exclude_comments().build("#| hello |#");
let item = parser.next();
assert_eq!(item, None);
format!("{:?}", ParserOptions::new());
}
#[test]
fn test_errors() {
let mut parser = Parser::with_tracker::<OffsetTracker>("|#");
let item = parser.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::DanglingCloseComment {
#[cfg(feature = "miette")]
input: "|#".to_string(),
offset: 0,
span: Span::new(0, 2, ()),
})));
let mut parser = Parser::with_tracker::<OffsetTracker>(")");
let item = parser.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::DanglingCloseParen {
#[cfg(feature = "miette")]
input: "|#".to_string(),
offset: 0,
span: Span::new(0, 1, ()),
})));
let mut parser = Parser::with_tracker::<OffsetTracker>("\"hello\\x\"");
let item = parser.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::UnsupportedEscape {
#[cfg(feature = "miette")]
input: "\"hello\\x\"".to_string(),
offset: 6,
character: 'x',
span: Span::new(0, 8, "hello".to_string()),
})));
let mut parser = Parser::with_tracker::<OffsetTracker>("\"\\n\\x\"");
let item = parser.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::UnsupportedEscape {
#[cfg(feature = "miette")]
input: "\"\\n\\x\"".to_string(),
offset: 3,
character: 'x',
span: Span::new(0, 5, "\n".to_string()),
})));
let mut parser = Parser::with_tracker::<OffsetTracker>("\"hello");
let item = parser.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::MissingEndQuote {
#[cfg(feature = "miette")]
input: "\"hello".to_string(),
offset: 6,
span: Span::new(0, 6, "hello".to_string()),
})));
let mut parser = Parser::with_tracker::<OffsetTracker>("\"hello\\n");
let item = parser.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::MissingEndQuote {
#[cfg(feature = "miette")]
input: "\"hello\\n".to_string(),
offset: 8,
span: Span::new(0, 8, "hello\n".to_string()),
})));
let mut parser = Parser::with_tracker::<OffsetTracker>("(");
let item = parser.next();
format!("{:?}", item);
assert_eq!(item, Some(Err(Error::MissingClosingBracket {
#[cfg(feature = "miette")]
input: "(".to_string(),
start_offset: 0,
})));
}
}

@ -0,0 +1,122 @@
use std::ops::{Deref, DerefMut};
#[cfg(feature = "miette")]
use miette::SourceSpan;
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
pub struct Span<V, O = usize> {
pub start: O,
pub end: O,
pub value: V,
}
impl<V, O> Span<V, O> {
pub fn with_value<N>(self, value: N) -> Span<N, O> {
Span {
start: self.start,
end: self.end,
value,
}
}
pub fn empty(self) -> Span<(), O> {
self.with_value(())
}
}
impl<V: ToString, O> Span<V, O> {
pub fn into_string(self) -> Span<String, O> {
Span {
start: self.start,
end: self.end,
value: self.value.to_string()
}
}
}
pub trait SpanOffset {
fn absolute_offset(&self) -> usize;
}
impl SpanOffset for usize {
fn absolute_offset(&self) -> usize {
*self
}
}
impl<V, O> Span<V, O> {
#[inline]
pub fn new(start: O, end: O, value: V) -> Self {
Span {
start,
end,
value,
}
}
pub fn unwrap(self) -> V {
self.value
}
}
impl<V, O> Deref for Span<V, O> {
type Target = V;
fn deref(&self) -> &Self::Target {
&self.value
}
}
impl<V, O> DerefMut for Span<V, O> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.value
}
}
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
pub struct LineOffset {
pub line: usize,
pub line_offset: usize,
pub absolute_offset: usize,
}
impl LineOffset {
pub fn new(line: usize, line_offset: usize, absolute_offset: usize) -> LineOffset {
LineOffset {
line,
line_offset,
absolute_offset,
}
}
}
impl SpanOffset for LineOffset {
fn absolute_offset(&self) -> usize {
self.absolute_offset
}
}
pub type LineSpan<V> = Span<V, LineOffset>;
#[cfg(feature = "miette")]
impl<V, T: SpanOffset> Into<SourceSpan> for &Span<V, T> {
fn into(self) -> SourceSpan {
(self.start.absolute_offset(), self.end.absolute_offset() - self.start.absolute_offset()).into()
}
}
#[cfg(test)]
mod tests {
use crate::lexer::{LineTracker, Tracker};
use crate::span::{LineOffset, Span, SpanOffset};
#[test]
pub fn simple_test() {
let mut line_tracker = LineTracker::default();
line_tracker.start(0);
line_tracker.process_newline(2);
assert_eq!(Span::new(LineOffset::new(0, 0, 0), LineOffset::new(1, 1, 4), ()), line_tracker.end(4, ()));
assert_eq!(1, LineOffset::new(0, 0, 1).absolute_offset());
assert_eq!(1, 1usize.absolute_offset());
}
}
Loading…
Cancel
Save