Add a prettier version of the parser
To be still done: - the high half implementing ChordPro v6 semantics - documentation of what we understand by the low half - split stuff into relevant files (half of gmpro/parse belongs to gmpro/parse/chordpro6) - environment groupingmaster
parent
83b98dbcdc
commit
c6d545833a
@ -0,0 +1,281 @@
|
||||
import gleam/int
|
||||
import gleam/result
|
||||
import gleam/list
|
||||
import gleam/regex
|
||||
import gleam/string
|
||||
import gleam/option.{type Option, Some, None}
|
||||
import gmpro/utils
|
||||
|
||||
import gleam/io
|
||||
|
||||
pub type DirectiveArgument {
|
||||
Flag(key: String)
|
||||
Option(key: String, value: String)
|
||||
}
|
||||
|
||||
pub type Directive {
|
||||
Directive(
|
||||
name: String,
|
||||
condition: Option(#(String, Bool)), // False when negated
|
||||
arguments: List(DirectiveArgument),
|
||||
)
|
||||
}
|
||||
|
||||
pub type Environment {
|
||||
Environment(
|
||||
start_directive: Directive,
|
||||
name: String,
|
||||
contents: List(String),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
// This time we will not accumulate environments, only detect them!
|
||||
pub type LineType {
|
||||
ChordLine(text: String)
|
||||
DirectiveLine(Directive)
|
||||
Comment(text: String)
|
||||
EmptyLine
|
||||
EnvironmentLine(text: String)
|
||||
}
|
||||
|
||||
pub fn canonical_directive(dir: Directive) -> Directive {
|
||||
// We need to get canonical name early, because we need to be able to match {soc} to {end_of_chorus} etc.
|
||||
// TODO: save orig_name?
|
||||
let #(correct_name, new_args) = {
|
||||
case dir.name {
|
||||
// https://www.chordpro.org/chordpro/directives-env/
|
||||
"soc" -> #("start_of_chorus", [])
|
||||
"eoc" -> #("end_of_chorus", [])
|
||||
"sov" -> #("start_of_verse", [])
|
||||
"eov" -> #("end_of_verse", [])
|
||||
"sob" -> #("start_of_bridge", [])
|
||||
"eob" -> #("end_of_bridge", [])
|
||||
"sot" -> #("start_of_tab", [])
|
||||
"eot" -> #("end_of_tab", [])
|
||||
"sog" -> #("start_of_grid", [])
|
||||
"eog" -> #("endt_of_grid", [])
|
||||
// https://www.chordpro.org/chordpro/directives-meta/
|
||||
"title" -> #("meta", [Flag("title")])
|
||||
"sorttitle" -> #("meta", [Flag("sorttitle")])
|
||||
"subtitle" -> #("meta", [Flag("subtitle")])
|
||||
"artist" -> #("meta", [Flag("artist")])
|
||||
"composer" -> #("meta", [Flag("composer")])
|
||||
"lyricist" -> #("meta", [Flag("lyricist")])
|
||||
"arranger" -> #("meta", [Flag("arranger")])
|
||||
"copyright" -> #("meta", [Flag("copyright")])
|
||||
"album" -> #("meta", [Flag("album")])
|
||||
"year" -> #("meta", [Flag("year")])
|
||||
"key" -> #("meta", [Flag("key")])
|
||||
"time" -> #("meta", [Flag("time")])
|
||||
"tempo" -> #("meta", [Flag("tempo")])
|
||||
"duration" -> #("meta", [Flag("duration")])
|
||||
"capo" -> #("meta", [Flag("capo")])
|
||||
// https://www.chordpro.org/chordpro/chordpro-directives/
|
||||
"ns" -> #("new_song", [])
|
||||
"t" -> #("meta", [Flag("title")])
|
||||
"st" -> #("meta", [Flag("subtitle")])
|
||||
"c" -> #("comment", [])
|
||||
"ci" -> #("comment_italic", [])
|
||||
"cb" -> #("comment_box", [])
|
||||
"cf" -> #("chordfont", [])
|
||||
"cs" -> #("chordsize", [])
|
||||
"tf" -> #("textfont", [])
|
||||
"ts" -> #("textsize", [])
|
||||
"np" -> #("new_page", [])
|
||||
"npp" -> #("new_physical_page", [])
|
||||
"colb" -> #("column_break", [])
|
||||
"g" -> #("grid", [])
|
||||
"ng" -> #("no_grid", [])
|
||||
"col" -> #("columns", [])
|
||||
//https://www.chordpro.org/chordpro/directives-comment/
|
||||
"highlight" -> #("comment", [])
|
||||
// Other cases: don't change
|
||||
name -> #(name, [])
|
||||
}
|
||||
}
|
||||
Directive(..dir, name: correct_name, arguments: list.append(new_args, dir.arguments))
|
||||
}
|
||||
// NOTE: we want to have this function be user-supplied in the actuall low-level parser to allow deviations.
|
||||
// We do not add semantics here, therefore we do *not* yet know which parts of the directive form one argument (i.e. `{meta title Twinkle Twinkle Little Star}`)
|
||||
|
||||
const directive_regex = "{([a-zA-Z0-9_]+)(-([a-zA-Z0-9_]+)(!?))?([: \t]+([^}]*))?}"
|
||||
pub fn parse_directive(str: String) -> Result(Directive, String) {
|
||||
// I don't believe this gets any simpler. It's just regexes and the handling is just mildly painful.
|
||||
// This might have been a bit simpler if we used string.split for everything, which probably would be possible, but also the code would probably be longer. To be maybe refactored later…
|
||||
let assert Ok(re) = regex.from_string(directive_regex)
|
||||
let matches = regex.scan(re, str)
|
||||
case matches {
|
||||
[] -> Error("This does not match the directive regex")
|
||||
[_a, _b, ..] -> Error("Somehow, this matches the regex multiple times. (Maybe there are multiple directives on a single line?)")
|
||||
[match] -> {
|
||||
let padded = utils.pad_list(match.submatches, with: None, to: 6)
|
||||
case padded {
|
||||
[name, _group, condition, invert, _group, attr] -> {
|
||||
let assert Some(name) = name // Cannot fail, we matched something.
|
||||
let condition = case condition, invert {
|
||||
Some(cond), inv -> Ok(Some(#(cond, option.is_none(inv))))
|
||||
None, None -> Ok(None)
|
||||
None, Some(_inv) -> Error("Condition only has negation without selector.")
|
||||
}
|
||||
let attr = case attr {
|
||||
None -> []
|
||||
Some(x) -> [Flag(x)]
|
||||
}
|
||||
result.try(condition, fn(condition) {Ok(Directive(name:, condition:, arguments: attr))})
|
||||
}
|
||||
_ -> panic as "submatches for did not work in parse_directive"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------
|
||||
|
||||
// TEMP!
|
||||
fn environment_ends(d: Directive) -> Option(String) {
|
||||
case d.name {
|
||||
"start_of_" <> something -> Some("{end_of_" <> something <> "}")
|
||||
_ -> None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_base(s: String) -> Result(List(LineType), String) {
|
||||
parse_detailed(s, canonical_directive, environment_ends)
|
||||
}
|
||||
|
||||
type EnvironmentChange {
|
||||
KeepEnv
|
||||
ExitEnv
|
||||
EnterEnv(end: String)
|
||||
}
|
||||
|
||||
fn parse_line_outside(
|
||||
line: String,
|
||||
canonical_directives: fn(Directive) -> Directive,
|
||||
is_environment: fn(Directive) -> Option(String),
|
||||
) -> Result(#(EnvironmentChange, LineType), String) {
|
||||
case line {
|
||||
"" -> Ok(#(KeepEnv, EmptyLine))
|
||||
"#" <> _something -> Ok(#(KeepEnv, Comment(line)))
|
||||
"{" <> _something -> {
|
||||
use directive <- result.try(parse_directive(line))
|
||||
let directive =
|
||||
directive
|
||||
|> canonical_directives
|
||||
let is_env = is_environment(directive)
|
||||
case is_env {
|
||||
None -> Ok(#(KeepEnv, DirectiveLine(directive)))
|
||||
Some(env_end) -> Ok(#(EnterEnv(env_end), DirectiveLine(directive)))
|
||||
}
|
||||
}
|
||||
_ -> Ok(#(KeepEnv, ChordLine(line)))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_line_inside(
|
||||
line: String,
|
||||
expect_end: String,
|
||||
canonical_directives: fn(Directive) -> Directive,
|
||||
is_environment: fn(Directive) -> Option(String),
|
||||
) -> Result(#(EnvironmentChange, LineType), String) {
|
||||
case line {
|
||||
line if line == expect_end -> {
|
||||
use directive <- result.try(parse_directive(line))
|
||||
Ok(#(ExitEnv, DirectiveLine(directive)))
|
||||
}
|
||||
"{" <> _something -> {
|
||||
// It may be a nested env. Otherwise it is an EnvironmentLine.
|
||||
case parse_directive(line) {
|
||||
Error(_) -> Ok(#(KeepEnv, EnvironmentLine(line)))
|
||||
Ok(dir) -> {
|
||||
// Why is this so nested? (we could have downgraded the Error to None though…)
|
||||
let canondir = canonical_directives(dir)
|
||||
case is_environment(canondir) {
|
||||
None -> Ok(#(KeepEnv, EnvironmentLine(line)))
|
||||
Some(end) -> {
|
||||
// TODO: add line number to the warning!
|
||||
io.println_error("Warning: nested environment: " <> line)
|
||||
Ok(#(EnterEnv(end), DirectiveLine(canondir)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ -> Ok(#(KeepEnv, EnvironmentLine(line)))
|
||||
}
|
||||
}
|
||||
|
||||
type FoldAccumulator {
|
||||
FoldAccumulator(end_stack: List(String), parsed_lines: List(LineType))
|
||||
// This represents a valid parse, so when folding this is wrapped into a Result.
|
||||
}
|
||||
|
||||
fn fold_func(
|
||||
acc: FoldAccumulator,
|
||||
item: String,
|
||||
line_number: Int,
|
||||
canonical_directives: fn(Directive) -> Directive,
|
||||
is_environment: fn(Directive) -> Option(String),
|
||||
) -> Result(FoldAccumulator, String) {
|
||||
// I pray this does not end up as horrible as The Horrible Fold in gmpro.gleam :-)
|
||||
let with_line_number = fn(s) { "line: " <> int.to_string(line_number) <> ": " <> s }
|
||||
let top_end = list.first(acc.end_stack)
|
||||
let rest_end: List(String) = list.rest(acc.end_stack)
|
||||
|> result.replace_error([])
|
||||
|> result.unwrap_both
|
||||
let new_parsed_line: Result(#(EnvironmentChange, LineType), String) = case top_end { // how should the end_stack change + the parsed line or an error.
|
||||
Error(Nil) -> parse_line_outside(item, canonical_directives, is_environment)
|
||||
Ok(end) -> parse_line_inside(item, end, canonical_directives, is_environment)
|
||||
} |> result.map_error(with_line_number)
|
||||
use #(dir, lt) <- result.try(new_parsed_line)
|
||||
// We good, just return Ok(FoldAccumulator)
|
||||
let new_endstack = case dir, top_end {
|
||||
// This feels ugly.
|
||||
KeepEnv, Ok(top) -> [top, ..rest_end]
|
||||
KeepEnv, Error(Nil) -> rest_end // == []
|
||||
EnterEnv(end), Ok(top) -> [end, top, ..rest_end]
|
||||
EnterEnv(end), Error(Nil) -> [end, ..rest_end] // == [end]
|
||||
ExitEnv, _ -> rest_end
|
||||
}
|
||||
let new_lines = [lt, ..acc.parsed_lines]
|
||||
Ok(FoldAccumulator(new_endstack, new_lines))
|
||||
}
|
||||
|
||||
fn fold_func_embed(
|
||||
f: fn(
|
||||
FoldAccumulator,
|
||||
String,
|
||||
Int,
|
||||
fn(Directive) -> Directive,
|
||||
fn(Directive) -> Option(String)
|
||||
) -> Result(FoldAccumulator, String),
|
||||
canonical_directives: fn(Directive) -> Directive,
|
||||
is_environment: fn(Directive) -> Option(String),
|
||||
) -> fn(FoldAccumulator, String, Int) -> Result(FoldAccumulator, String) {
|
||||
fn(acc: FoldAccumulator, item: String, line_number: Int) -> Result(FoldAccumulator, String) {
|
||||
f(acc, item, line_number, canonical_directives, is_environment)
|
||||
}
|
||||
}
|
||||
|
||||
/// This parser lets the caller specify how all the parts are parsed and
|
||||
/// what the canonical names are.
|
||||
pub fn parse_detailed(
|
||||
input: String,
|
||||
canonical_directives: fn(Directive) -> Directive,
|
||||
is_environment: fn(Directive) -> Option(String),
|
||||
) -> Result(List(LineType), String) {
|
||||
let line_types =
|
||||
input
|
||||
|> string.split(on: "\n")
|
||||
|> list.map(string.trim)
|
||||
|> utils.fold_try_varindex(from: FoldAccumulator([], []), starting_with: 1, with: fold_func_embed(fold_func, canonical_directives, is_environment))
|
||||
// For some reason I cannot `|> result.try()`, which makes me sad, so it goes to the line below :-)
|
||||
|> io.debug
|
||||
use line_types <- result.try(line_types)
|
||||
case line_types {
|
||||
FoldAccumulator([], lines) -> Ok(list.reverse(lines)) // It is faster to add lines to front!
|
||||
_ -> Error("Non-empty fold accumulator: " <> string.inspect(line_types))
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue