From c6d545833ac2bae01cf4462ed731c920c2636f99 Mon Sep 17 00:00:00 2001 From: Pavel 'LEdoian' Turinsky Date: Tue, 10 Sep 2024 14:02:25 +0200 Subject: [PATCH] Add a prettier version of the parser To be still done: - the high half implementing ChordPro v6 semantics - documentation of what we understand by the low half - split stuff into relevant files (half of gmpro/parse belongs to gmpro/parse/chordpro6) - environment grouping --- gmpro/src/gmpro/demo.gleam | 4 +- gmpro/src/gmpro/parse.gleam | 281 ++++++++++++++++++++++++++++++++++++ gmpro/src/gmpro/utils.gleam | 26 ++++ 3 files changed, 309 insertions(+), 2 deletions(-) create mode 100644 gmpro/src/gmpro/parse.gleam diff --git a/gmpro/src/gmpro/demo.gleam b/gmpro/src/gmpro/demo.gleam index c50aab1..b791e64 100644 --- a/gmpro/src/gmpro/demo.gleam +++ b/gmpro/src/gmpro/demo.gleam @@ -1,4 +1,4 @@ -import gmpro +import gmpro/parse import simplifile import gleam/list import gleam/io @@ -16,7 +16,7 @@ pub fn main() { io.debug(err) "simplifile failed." }) - |> result.map(gmpro.parse_base) + |> result.map(parse.parse_base) case verdict { Ok(result) -> io.println(pprint.styled(result)) Error(err) -> io.println_error(err) diff --git a/gmpro/src/gmpro/parse.gleam b/gmpro/src/gmpro/parse.gleam new file mode 100644 index 0000000..f449cf8 --- /dev/null +++ b/gmpro/src/gmpro/parse.gleam @@ -0,0 +1,281 @@ +import gleam/int +import gleam/result +import gleam/list +import gleam/regex +import gleam/string +import gleam/option.{type Option, Some, None} +import gmpro/utils + +import gleam/io + +pub type DirectiveArgument { + Flag(key: String) + Option(key: String, value: String) +} + +pub type Directive { + Directive( + name: String, + condition: Option(#(String, Bool)), // False when negated + arguments: List(DirectiveArgument), + ) +} + +pub type Environment { + Environment( + start_directive: Directive, + name: String, + contents: List(String), + ) +} + + +// This time we will not accumulate environments, only detect them! +pub type LineType { + ChordLine(text: String) + DirectiveLine(Directive) + Comment(text: String) + EmptyLine + EnvironmentLine(text: String) +} + +pub fn canonical_directive(dir: Directive) -> Directive { + // We need to get canonical name early, because we need to be able to match {soc} to {end_of_chorus} etc. + // TODO: save orig_name? + let #(correct_name, new_args) = { + case dir.name { + // https://www.chordpro.org/chordpro/directives-env/ + "soc" -> #("start_of_chorus", []) + "eoc" -> #("end_of_chorus", []) + "sov" -> #("start_of_verse", []) + "eov" -> #("end_of_verse", []) + "sob" -> #("start_of_bridge", []) + "eob" -> #("end_of_bridge", []) + "sot" -> #("start_of_tab", []) + "eot" -> #("end_of_tab", []) + "sog" -> #("start_of_grid", []) + "eog" -> #("endt_of_grid", []) + // https://www.chordpro.org/chordpro/directives-meta/ + "title" -> #("meta", [Flag("title")]) + "sorttitle" -> #("meta", [Flag("sorttitle")]) + "subtitle" -> #("meta", [Flag("subtitle")]) + "artist" -> #("meta", [Flag("artist")]) + "composer" -> #("meta", [Flag("composer")]) + "lyricist" -> #("meta", [Flag("lyricist")]) + "arranger" -> #("meta", [Flag("arranger")]) + "copyright" -> #("meta", [Flag("copyright")]) + "album" -> #("meta", [Flag("album")]) + "year" -> #("meta", [Flag("year")]) + "key" -> #("meta", [Flag("key")]) + "time" -> #("meta", [Flag("time")]) + "tempo" -> #("meta", [Flag("tempo")]) + "duration" -> #("meta", [Flag("duration")]) + "capo" -> #("meta", [Flag("capo")]) + // https://www.chordpro.org/chordpro/chordpro-directives/ + "ns" -> #("new_song", []) + "t" -> #("meta", [Flag("title")]) + "st" -> #("meta", [Flag("subtitle")]) + "c" -> #("comment", []) + "ci" -> #("comment_italic", []) + "cb" -> #("comment_box", []) + "cf" -> #("chordfont", []) + "cs" -> #("chordsize", []) + "tf" -> #("textfont", []) + "ts" -> #("textsize", []) + "np" -> #("new_page", []) + "npp" -> #("new_physical_page", []) + "colb" -> #("column_break", []) + "g" -> #("grid", []) + "ng" -> #("no_grid", []) + "col" -> #("columns", []) + //https://www.chordpro.org/chordpro/directives-comment/ + "highlight" -> #("comment", []) + // Other cases: don't change + name -> #(name, []) + } + } + Directive(..dir, name: correct_name, arguments: list.append(new_args, dir.arguments)) +} +// NOTE: we want to have this function be user-supplied in the actuall low-level parser to allow deviations. +// We do not add semantics here, therefore we do *not* yet know which parts of the directive form one argument (i.e. `{meta title Twinkle Twinkle Little Star}`) + +const directive_regex = "{([a-zA-Z0-9_]+)(-([a-zA-Z0-9_]+)(!?))?([: \t]+([^}]*))?}" +pub fn parse_directive(str: String) -> Result(Directive, String) { + // I don't believe this gets any simpler. It's just regexes and the handling is just mildly painful. + // This might have been a bit simpler if we used string.split for everything, which probably would be possible, but also the code would probably be longer. To be maybe refactored later… + let assert Ok(re) = regex.from_string(directive_regex) + let matches = regex.scan(re, str) + case matches { + [] -> Error("This does not match the directive regex") + [_a, _b, ..] -> Error("Somehow, this matches the regex multiple times. (Maybe there are multiple directives on a single line?)") + [match] -> { + let padded = utils.pad_list(match.submatches, with: None, to: 6) + case padded { + [name, _group, condition, invert, _group, attr] -> { + let assert Some(name) = name // Cannot fail, we matched something. + let condition = case condition, invert { + Some(cond), inv -> Ok(Some(#(cond, option.is_none(inv)))) + None, None -> Ok(None) + None, Some(_inv) -> Error("Condition only has negation without selector.") + } + let attr = case attr { + None -> [] + Some(x) -> [Flag(x)] + } + result.try(condition, fn(condition) {Ok(Directive(name:, condition:, arguments: attr))}) + } + _ -> panic as "submatches for did not work in parse_directive" + } + } + } +} + +// ------------------------- + +// TEMP! +fn environment_ends(d: Directive) -> Option(String) { + case d.name { + "start_of_" <> something -> Some("{end_of_" <> something <> "}") + _ -> None + } +} + +pub fn parse_base(s: String) -> Result(List(LineType), String) { + parse_detailed(s, canonical_directive, environment_ends) +} + +type EnvironmentChange { + KeepEnv + ExitEnv + EnterEnv(end: String) +} + +fn parse_line_outside( + line: String, + canonical_directives: fn(Directive) -> Directive, + is_environment: fn(Directive) -> Option(String), + ) -> Result(#(EnvironmentChange, LineType), String) { + case line { + "" -> Ok(#(KeepEnv, EmptyLine)) + "#" <> _something -> Ok(#(KeepEnv, Comment(line))) + "{" <> _something -> { + use directive <- result.try(parse_directive(line)) + let directive = + directive + |> canonical_directives + let is_env = is_environment(directive) + case is_env { + None -> Ok(#(KeepEnv, DirectiveLine(directive))) + Some(env_end) -> Ok(#(EnterEnv(env_end), DirectiveLine(directive))) + } + } + _ -> Ok(#(KeepEnv, ChordLine(line))) + } +} + +fn parse_line_inside( + line: String, + expect_end: String, + canonical_directives: fn(Directive) -> Directive, + is_environment: fn(Directive) -> Option(String), + ) -> Result(#(EnvironmentChange, LineType), String) { + case line { + line if line == expect_end -> { + use directive <- result.try(parse_directive(line)) + Ok(#(ExitEnv, DirectiveLine(directive))) + } + "{" <> _something -> { + // It may be a nested env. Otherwise it is an EnvironmentLine. + case parse_directive(line) { + Error(_) -> Ok(#(KeepEnv, EnvironmentLine(line))) + Ok(dir) -> { + // Why is this so nested? (we could have downgraded the Error to None though…) + let canondir = canonical_directives(dir) + case is_environment(canondir) { + None -> Ok(#(KeepEnv, EnvironmentLine(line))) + Some(end) -> { + // TODO: add line number to the warning! + io.println_error("Warning: nested environment: " <> line) + Ok(#(EnterEnv(end), DirectiveLine(canondir))) + } + } + } + } + } + _ -> Ok(#(KeepEnv, EnvironmentLine(line))) + } +} + +type FoldAccumulator { + FoldAccumulator(end_stack: List(String), parsed_lines: List(LineType)) + // This represents a valid parse, so when folding this is wrapped into a Result. +} + +fn fold_func( + acc: FoldAccumulator, + item: String, + line_number: Int, + canonical_directives: fn(Directive) -> Directive, + is_environment: fn(Directive) -> Option(String), + ) -> Result(FoldAccumulator, String) { + // I pray this does not end up as horrible as The Horrible Fold in gmpro.gleam :-) + let with_line_number = fn(s) { "line: " <> int.to_string(line_number) <> ": " <> s } + let top_end = list.first(acc.end_stack) + let rest_end: List(String) = list.rest(acc.end_stack) + |> result.replace_error([]) + |> result.unwrap_both + let new_parsed_line: Result(#(EnvironmentChange, LineType), String) = case top_end { // how should the end_stack change + the parsed line or an error. + Error(Nil) -> parse_line_outside(item, canonical_directives, is_environment) + Ok(end) -> parse_line_inside(item, end, canonical_directives, is_environment) + } |> result.map_error(with_line_number) + use #(dir, lt) <- result.try(new_parsed_line) + // We good, just return Ok(FoldAccumulator) + let new_endstack = case dir, top_end { + // This feels ugly. + KeepEnv, Ok(top) -> [top, ..rest_end] + KeepEnv, Error(Nil) -> rest_end // == [] + EnterEnv(end), Ok(top) -> [end, top, ..rest_end] + EnterEnv(end), Error(Nil) -> [end, ..rest_end] // == [end] + ExitEnv, _ -> rest_end + } + let new_lines = [lt, ..acc.parsed_lines] + Ok(FoldAccumulator(new_endstack, new_lines)) +} + +fn fold_func_embed( + f: fn( + FoldAccumulator, + String, + Int, + fn(Directive) -> Directive, + fn(Directive) -> Option(String) + ) -> Result(FoldAccumulator, String), + canonical_directives: fn(Directive) -> Directive, + is_environment: fn(Directive) -> Option(String), + ) -> fn(FoldAccumulator, String, Int) -> Result(FoldAccumulator, String) { + fn(acc: FoldAccumulator, item: String, line_number: Int) -> Result(FoldAccumulator, String) { + f(acc, item, line_number, canonical_directives, is_environment) + } +} + +/// This parser lets the caller specify how all the parts are parsed and +/// what the canonical names are. +pub fn parse_detailed( + input: String, + canonical_directives: fn(Directive) -> Directive, + is_environment: fn(Directive) -> Option(String), + ) -> Result(List(LineType), String) { + let line_types = + input + |> string.split(on: "\n") + |> list.map(string.trim) + |> utils.fold_try_varindex(from: FoldAccumulator([], []), starting_with: 1, with: fold_func_embed(fold_func, canonical_directives, is_environment)) + // For some reason I cannot `|> result.try()`, which makes me sad, so it goes to the line below :-) + |> io.debug + use line_types <- result.try(line_types) + case line_types { + FoldAccumulator([], lines) -> Ok(list.reverse(lines)) // It is faster to add lines to front! + _ -> Error("Non-empty fold accumulator: " <> string.inspect(line_types)) + } +} + diff --git a/gmpro/src/gmpro/utils.gleam b/gmpro/src/gmpro/utils.gleam index 2fd7592..ca49d46 100644 --- a/gmpro/src/gmpro/utils.gleam +++ b/gmpro/src/gmpro/utils.gleam @@ -1,6 +1,8 @@ //// Various utilities. import gleam/list.{type ContinueOrStop, Stop, Continue} +import gleam/int +import gleam/result /// list.fold that can both terminate early and pass index to the folding function, with an option to set the initial index pub fn fold_until_varindex( @@ -20,3 +22,27 @@ pub fn fold_until_varindex( } } } + +// maybe we could have reused the above, but whatever… +pub fn fold_try_varindex( + over collection: List(a), + from accumulator: b, + starting_with index: Int, + with fun: fn(b, a, Int) -> Result(b, c), + ) -> Result(b, c) { + case collection { + [] -> Ok(accumulator) + [x, ..xs] -> { + use new_acc <- result.try(fun(accumulator, x, index)) + fold_try_varindex(xs, new_acc, index+1, fun) + } + } +} + +/// pads list from the right to the given length. Useful for matching regexes. +pub fn pad_list(list: List(a), with item: a, to length: Int) -> List(a) { + let reversed = list.reverse(list) + let number_to_add = { length - list.length(list) } |> int.max(0) + let extended = list.fold(over: list.repeat(Nil, times: number_to_add), from: reversed, with: fn(list, _nil) {[item, ..list]}) + list.reverse(extended) +}