Add a prettier version of the parser
To be still done: - the high half implementing ChordPro v6 semantics - documentation of what we understand by the low half - split stuff into relevant files (half of gmpro/parse belongs to gmpro/parse/chordpro6) - environment groupingmaster
parent
83b98dbcdc
commit
c6d545833a
@ -0,0 +1,281 @@
|
|||||||
|
import gleam/int
|
||||||
|
import gleam/result
|
||||||
|
import gleam/list
|
||||||
|
import gleam/regex
|
||||||
|
import gleam/string
|
||||||
|
import gleam/option.{type Option, Some, None}
|
||||||
|
import gmpro/utils
|
||||||
|
|
||||||
|
import gleam/io
|
||||||
|
|
||||||
|
pub type DirectiveArgument {
|
||||||
|
Flag(key: String)
|
||||||
|
Option(key: String, value: String)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Directive {
|
||||||
|
Directive(
|
||||||
|
name: String,
|
||||||
|
condition: Option(#(String, Bool)), // False when negated
|
||||||
|
arguments: List(DirectiveArgument),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Environment {
|
||||||
|
Environment(
|
||||||
|
start_directive: Directive,
|
||||||
|
name: String,
|
||||||
|
contents: List(String),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// This time we will not accumulate environments, only detect them!
|
||||||
|
pub type LineType {
|
||||||
|
ChordLine(text: String)
|
||||||
|
DirectiveLine(Directive)
|
||||||
|
Comment(text: String)
|
||||||
|
EmptyLine
|
||||||
|
EnvironmentLine(text: String)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn canonical_directive(dir: Directive) -> Directive {
|
||||||
|
// We need to get canonical name early, because we need to be able to match {soc} to {end_of_chorus} etc.
|
||||||
|
// TODO: save orig_name?
|
||||||
|
let #(correct_name, new_args) = {
|
||||||
|
case dir.name {
|
||||||
|
// https://www.chordpro.org/chordpro/directives-env/
|
||||||
|
"soc" -> #("start_of_chorus", [])
|
||||||
|
"eoc" -> #("end_of_chorus", [])
|
||||||
|
"sov" -> #("start_of_verse", [])
|
||||||
|
"eov" -> #("end_of_verse", [])
|
||||||
|
"sob" -> #("start_of_bridge", [])
|
||||||
|
"eob" -> #("end_of_bridge", [])
|
||||||
|
"sot" -> #("start_of_tab", [])
|
||||||
|
"eot" -> #("end_of_tab", [])
|
||||||
|
"sog" -> #("start_of_grid", [])
|
||||||
|
"eog" -> #("endt_of_grid", [])
|
||||||
|
// https://www.chordpro.org/chordpro/directives-meta/
|
||||||
|
"title" -> #("meta", [Flag("title")])
|
||||||
|
"sorttitle" -> #("meta", [Flag("sorttitle")])
|
||||||
|
"subtitle" -> #("meta", [Flag("subtitle")])
|
||||||
|
"artist" -> #("meta", [Flag("artist")])
|
||||||
|
"composer" -> #("meta", [Flag("composer")])
|
||||||
|
"lyricist" -> #("meta", [Flag("lyricist")])
|
||||||
|
"arranger" -> #("meta", [Flag("arranger")])
|
||||||
|
"copyright" -> #("meta", [Flag("copyright")])
|
||||||
|
"album" -> #("meta", [Flag("album")])
|
||||||
|
"year" -> #("meta", [Flag("year")])
|
||||||
|
"key" -> #("meta", [Flag("key")])
|
||||||
|
"time" -> #("meta", [Flag("time")])
|
||||||
|
"tempo" -> #("meta", [Flag("tempo")])
|
||||||
|
"duration" -> #("meta", [Flag("duration")])
|
||||||
|
"capo" -> #("meta", [Flag("capo")])
|
||||||
|
// https://www.chordpro.org/chordpro/chordpro-directives/
|
||||||
|
"ns" -> #("new_song", [])
|
||||||
|
"t" -> #("meta", [Flag("title")])
|
||||||
|
"st" -> #("meta", [Flag("subtitle")])
|
||||||
|
"c" -> #("comment", [])
|
||||||
|
"ci" -> #("comment_italic", [])
|
||||||
|
"cb" -> #("comment_box", [])
|
||||||
|
"cf" -> #("chordfont", [])
|
||||||
|
"cs" -> #("chordsize", [])
|
||||||
|
"tf" -> #("textfont", [])
|
||||||
|
"ts" -> #("textsize", [])
|
||||||
|
"np" -> #("new_page", [])
|
||||||
|
"npp" -> #("new_physical_page", [])
|
||||||
|
"colb" -> #("column_break", [])
|
||||||
|
"g" -> #("grid", [])
|
||||||
|
"ng" -> #("no_grid", [])
|
||||||
|
"col" -> #("columns", [])
|
||||||
|
//https://www.chordpro.org/chordpro/directives-comment/
|
||||||
|
"highlight" -> #("comment", [])
|
||||||
|
// Other cases: don't change
|
||||||
|
name -> #(name, [])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Directive(..dir, name: correct_name, arguments: list.append(new_args, dir.arguments))
|
||||||
|
}
|
||||||
|
// NOTE: we want to have this function be user-supplied in the actuall low-level parser to allow deviations.
|
||||||
|
// We do not add semantics here, therefore we do *not* yet know which parts of the directive form one argument (i.e. `{meta title Twinkle Twinkle Little Star}`)
|
||||||
|
|
||||||
|
const directive_regex = "{([a-zA-Z0-9_]+)(-([a-zA-Z0-9_]+)(!?))?([: \t]+([^}]*))?}"
|
||||||
|
pub fn parse_directive(str: String) -> Result(Directive, String) {
|
||||||
|
// I don't believe this gets any simpler. It's just regexes and the handling is just mildly painful.
|
||||||
|
// This might have been a bit simpler if we used string.split for everything, which probably would be possible, but also the code would probably be longer. To be maybe refactored later…
|
||||||
|
let assert Ok(re) = regex.from_string(directive_regex)
|
||||||
|
let matches = regex.scan(re, str)
|
||||||
|
case matches {
|
||||||
|
[] -> Error("This does not match the directive regex")
|
||||||
|
[_a, _b, ..] -> Error("Somehow, this matches the regex multiple times. (Maybe there are multiple directives on a single line?)")
|
||||||
|
[match] -> {
|
||||||
|
let padded = utils.pad_list(match.submatches, with: None, to: 6)
|
||||||
|
case padded {
|
||||||
|
[name, _group, condition, invert, _group, attr] -> {
|
||||||
|
let assert Some(name) = name // Cannot fail, we matched something.
|
||||||
|
let condition = case condition, invert {
|
||||||
|
Some(cond), inv -> Ok(Some(#(cond, option.is_none(inv))))
|
||||||
|
None, None -> Ok(None)
|
||||||
|
None, Some(_inv) -> Error("Condition only has negation without selector.")
|
||||||
|
}
|
||||||
|
let attr = case attr {
|
||||||
|
None -> []
|
||||||
|
Some(x) -> [Flag(x)]
|
||||||
|
}
|
||||||
|
result.try(condition, fn(condition) {Ok(Directive(name:, condition:, arguments: attr))})
|
||||||
|
}
|
||||||
|
_ -> panic as "submatches for did not work in parse_directive"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------
|
||||||
|
|
||||||
|
// TEMP!
|
||||||
|
fn environment_ends(d: Directive) -> Option(String) {
|
||||||
|
case d.name {
|
||||||
|
"start_of_" <> something -> Some("{end_of_" <> something <> "}")
|
||||||
|
_ -> None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_base(s: String) -> Result(List(LineType), String) {
|
||||||
|
parse_detailed(s, canonical_directive, environment_ends)
|
||||||
|
}
|
||||||
|
|
||||||
|
type EnvironmentChange {
|
||||||
|
KeepEnv
|
||||||
|
ExitEnv
|
||||||
|
EnterEnv(end: String)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_line_outside(
|
||||||
|
line: String,
|
||||||
|
canonical_directives: fn(Directive) -> Directive,
|
||||||
|
is_environment: fn(Directive) -> Option(String),
|
||||||
|
) -> Result(#(EnvironmentChange, LineType), String) {
|
||||||
|
case line {
|
||||||
|
"" -> Ok(#(KeepEnv, EmptyLine))
|
||||||
|
"#" <> _something -> Ok(#(KeepEnv, Comment(line)))
|
||||||
|
"{" <> _something -> {
|
||||||
|
use directive <- result.try(parse_directive(line))
|
||||||
|
let directive =
|
||||||
|
directive
|
||||||
|
|> canonical_directives
|
||||||
|
let is_env = is_environment(directive)
|
||||||
|
case is_env {
|
||||||
|
None -> Ok(#(KeepEnv, DirectiveLine(directive)))
|
||||||
|
Some(env_end) -> Ok(#(EnterEnv(env_end), DirectiveLine(directive)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> Ok(#(KeepEnv, ChordLine(line)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_line_inside(
|
||||||
|
line: String,
|
||||||
|
expect_end: String,
|
||||||
|
canonical_directives: fn(Directive) -> Directive,
|
||||||
|
is_environment: fn(Directive) -> Option(String),
|
||||||
|
) -> Result(#(EnvironmentChange, LineType), String) {
|
||||||
|
case line {
|
||||||
|
line if line == expect_end -> {
|
||||||
|
use directive <- result.try(parse_directive(line))
|
||||||
|
Ok(#(ExitEnv, DirectiveLine(directive)))
|
||||||
|
}
|
||||||
|
"{" <> _something -> {
|
||||||
|
// It may be a nested env. Otherwise it is an EnvironmentLine.
|
||||||
|
case parse_directive(line) {
|
||||||
|
Error(_) -> Ok(#(KeepEnv, EnvironmentLine(line)))
|
||||||
|
Ok(dir) -> {
|
||||||
|
// Why is this so nested? (we could have downgraded the Error to None though…)
|
||||||
|
let canondir = canonical_directives(dir)
|
||||||
|
case is_environment(canondir) {
|
||||||
|
None -> Ok(#(KeepEnv, EnvironmentLine(line)))
|
||||||
|
Some(end) -> {
|
||||||
|
// TODO: add line number to the warning!
|
||||||
|
io.println_error("Warning: nested environment: " <> line)
|
||||||
|
Ok(#(EnterEnv(end), DirectiveLine(canondir)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> Ok(#(KeepEnv, EnvironmentLine(line)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type FoldAccumulator {
|
||||||
|
FoldAccumulator(end_stack: List(String), parsed_lines: List(LineType))
|
||||||
|
// This represents a valid parse, so when folding this is wrapped into a Result.
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fold_func(
|
||||||
|
acc: FoldAccumulator,
|
||||||
|
item: String,
|
||||||
|
line_number: Int,
|
||||||
|
canonical_directives: fn(Directive) -> Directive,
|
||||||
|
is_environment: fn(Directive) -> Option(String),
|
||||||
|
) -> Result(FoldAccumulator, String) {
|
||||||
|
// I pray this does not end up as horrible as The Horrible Fold in gmpro.gleam :-)
|
||||||
|
let with_line_number = fn(s) { "line: " <> int.to_string(line_number) <> ": " <> s }
|
||||||
|
let top_end = list.first(acc.end_stack)
|
||||||
|
let rest_end: List(String) = list.rest(acc.end_stack)
|
||||||
|
|> result.replace_error([])
|
||||||
|
|> result.unwrap_both
|
||||||
|
let new_parsed_line: Result(#(EnvironmentChange, LineType), String) = case top_end { // how should the end_stack change + the parsed line or an error.
|
||||||
|
Error(Nil) -> parse_line_outside(item, canonical_directives, is_environment)
|
||||||
|
Ok(end) -> parse_line_inside(item, end, canonical_directives, is_environment)
|
||||||
|
} |> result.map_error(with_line_number)
|
||||||
|
use #(dir, lt) <- result.try(new_parsed_line)
|
||||||
|
// We good, just return Ok(FoldAccumulator)
|
||||||
|
let new_endstack = case dir, top_end {
|
||||||
|
// This feels ugly.
|
||||||
|
KeepEnv, Ok(top) -> [top, ..rest_end]
|
||||||
|
KeepEnv, Error(Nil) -> rest_end // == []
|
||||||
|
EnterEnv(end), Ok(top) -> [end, top, ..rest_end]
|
||||||
|
EnterEnv(end), Error(Nil) -> [end, ..rest_end] // == [end]
|
||||||
|
ExitEnv, _ -> rest_end
|
||||||
|
}
|
||||||
|
let new_lines = [lt, ..acc.parsed_lines]
|
||||||
|
Ok(FoldAccumulator(new_endstack, new_lines))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fold_func_embed(
|
||||||
|
f: fn(
|
||||||
|
FoldAccumulator,
|
||||||
|
String,
|
||||||
|
Int,
|
||||||
|
fn(Directive) -> Directive,
|
||||||
|
fn(Directive) -> Option(String)
|
||||||
|
) -> Result(FoldAccumulator, String),
|
||||||
|
canonical_directives: fn(Directive) -> Directive,
|
||||||
|
is_environment: fn(Directive) -> Option(String),
|
||||||
|
) -> fn(FoldAccumulator, String, Int) -> Result(FoldAccumulator, String) {
|
||||||
|
fn(acc: FoldAccumulator, item: String, line_number: Int) -> Result(FoldAccumulator, String) {
|
||||||
|
f(acc, item, line_number, canonical_directives, is_environment)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This parser lets the caller specify how all the parts are parsed and
|
||||||
|
/// what the canonical names are.
|
||||||
|
pub fn parse_detailed(
|
||||||
|
input: String,
|
||||||
|
canonical_directives: fn(Directive) -> Directive,
|
||||||
|
is_environment: fn(Directive) -> Option(String),
|
||||||
|
) -> Result(List(LineType), String) {
|
||||||
|
let line_types =
|
||||||
|
input
|
||||||
|
|> string.split(on: "\n")
|
||||||
|
|> list.map(string.trim)
|
||||||
|
|> utils.fold_try_varindex(from: FoldAccumulator([], []), starting_with: 1, with: fold_func_embed(fold_func, canonical_directives, is_environment))
|
||||||
|
// For some reason I cannot `|> result.try()`, which makes me sad, so it goes to the line below :-)
|
||||||
|
|> io.debug
|
||||||
|
use line_types <- result.try(line_types)
|
||||||
|
case line_types {
|
||||||
|
FoldAccumulator([], lines) -> Ok(list.reverse(lines)) // It is faster to add lines to front!
|
||||||
|
_ -> Error("Non-empty fold accumulator: " <> string.inspect(line_types))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue