1
0
Fork 0

Add a prettier version of the parser

To be still done:
- the high half implementing ChordPro v6 semantics
- documentation of what we understand by the low half
- split stuff into relevant files (half of gmpro/parse belongs to
  gmpro/parse/chordpro6)
- environment grouping
master
LEdoian 3 weeks ago
parent 83b98dbcdc
commit c6d545833a

@ -1,4 +1,4 @@
import gmpro
import gmpro/parse
import simplifile
import gleam/list
import gleam/io
@ -16,7 +16,7 @@ pub fn main() {
io.debug(err)
"simplifile failed."
})
|> result.map(gmpro.parse_base)
|> result.map(parse.parse_base)
case verdict {
Ok(result) -> io.println(pprint.styled(result))
Error(err) -> io.println_error(err)

@ -0,0 +1,281 @@
import gleam/int
import gleam/result
import gleam/list
import gleam/regex
import gleam/string
import gleam/option.{type Option, Some, None}
import gmpro/utils
import gleam/io
pub type DirectiveArgument {
Flag(key: String)
Option(key: String, value: String)
}
pub type Directive {
Directive(
name: String,
condition: Option(#(String, Bool)), // False when negated
arguments: List(DirectiveArgument),
)
}
pub type Environment {
Environment(
start_directive: Directive,
name: String,
contents: List(String),
)
}
// This time we will not accumulate environments, only detect them!
pub type LineType {
ChordLine(text: String)
DirectiveLine(Directive)
Comment(text: String)
EmptyLine
EnvironmentLine(text: String)
}
pub fn canonical_directive(dir: Directive) -> Directive {
// We need to get canonical name early, because we need to be able to match {soc} to {end_of_chorus} etc.
// TODO: save orig_name?
let #(correct_name, new_args) = {
case dir.name {
// https://www.chordpro.org/chordpro/directives-env/
"soc" -> #("start_of_chorus", [])
"eoc" -> #("end_of_chorus", [])
"sov" -> #("start_of_verse", [])
"eov" -> #("end_of_verse", [])
"sob" -> #("start_of_bridge", [])
"eob" -> #("end_of_bridge", [])
"sot" -> #("start_of_tab", [])
"eot" -> #("end_of_tab", [])
"sog" -> #("start_of_grid", [])
"eog" -> #("endt_of_grid", [])
// https://www.chordpro.org/chordpro/directives-meta/
"title" -> #("meta", [Flag("title")])
"sorttitle" -> #("meta", [Flag("sorttitle")])
"subtitle" -> #("meta", [Flag("subtitle")])
"artist" -> #("meta", [Flag("artist")])
"composer" -> #("meta", [Flag("composer")])
"lyricist" -> #("meta", [Flag("lyricist")])
"arranger" -> #("meta", [Flag("arranger")])
"copyright" -> #("meta", [Flag("copyright")])
"album" -> #("meta", [Flag("album")])
"year" -> #("meta", [Flag("year")])
"key" -> #("meta", [Flag("key")])
"time" -> #("meta", [Flag("time")])
"tempo" -> #("meta", [Flag("tempo")])
"duration" -> #("meta", [Flag("duration")])
"capo" -> #("meta", [Flag("capo")])
// https://www.chordpro.org/chordpro/chordpro-directives/
"ns" -> #("new_song", [])
"t" -> #("meta", [Flag("title")])
"st" -> #("meta", [Flag("subtitle")])
"c" -> #("comment", [])
"ci" -> #("comment_italic", [])
"cb" -> #("comment_box", [])
"cf" -> #("chordfont", [])
"cs" -> #("chordsize", [])
"tf" -> #("textfont", [])
"ts" -> #("textsize", [])
"np" -> #("new_page", [])
"npp" -> #("new_physical_page", [])
"colb" -> #("column_break", [])
"g" -> #("grid", [])
"ng" -> #("no_grid", [])
"col" -> #("columns", [])
//https://www.chordpro.org/chordpro/directives-comment/
"highlight" -> #("comment", [])
// Other cases: don't change
name -> #(name, [])
}
}
Directive(..dir, name: correct_name, arguments: list.append(new_args, dir.arguments))
}
// NOTE: we want to have this function be user-supplied in the actuall low-level parser to allow deviations.
// We do not add semantics here, therefore we do *not* yet know which parts of the directive form one argument (i.e. `{meta title Twinkle Twinkle Little Star}`)
const directive_regex = "{([a-zA-Z0-9_]+)(-([a-zA-Z0-9_]+)(!?))?([: \t]+([^}]*))?}"
pub fn parse_directive(str: String) -> Result(Directive, String) {
// I don't believe this gets any simpler. It's just regexes and the handling is just mildly painful.
// This might have been a bit simpler if we used string.split for everything, which probably would be possible, but also the code would probably be longer. To be maybe refactored later…
let assert Ok(re) = regex.from_string(directive_regex)
let matches = regex.scan(re, str)
case matches {
[] -> Error("This does not match the directive regex")
[_a, _b, ..] -> Error("Somehow, this matches the regex multiple times. (Maybe there are multiple directives on a single line?)")
[match] -> {
let padded = utils.pad_list(match.submatches, with: None, to: 6)
case padded {
[name, _group, condition, invert, _group, attr] -> {
let assert Some(name) = name // Cannot fail, we matched something.
let condition = case condition, invert {
Some(cond), inv -> Ok(Some(#(cond, option.is_none(inv))))
None, None -> Ok(None)
None, Some(_inv) -> Error("Condition only has negation without selector.")
}
let attr = case attr {
None -> []
Some(x) -> [Flag(x)]
}
result.try(condition, fn(condition) {Ok(Directive(name:, condition:, arguments: attr))})
}
_ -> panic as "submatches for did not work in parse_directive"
}
}
}
}
// -------------------------
// TEMP!
fn environment_ends(d: Directive) -> Option(String) {
case d.name {
"start_of_" <> something -> Some("{end_of_" <> something <> "}")
_ -> None
}
}
pub fn parse_base(s: String) -> Result(List(LineType), String) {
parse_detailed(s, canonical_directive, environment_ends)
}
type EnvironmentChange {
KeepEnv
ExitEnv
EnterEnv(end: String)
}
fn parse_line_outside(
line: String,
canonical_directives: fn(Directive) -> Directive,
is_environment: fn(Directive) -> Option(String),
) -> Result(#(EnvironmentChange, LineType), String) {
case line {
"" -> Ok(#(KeepEnv, EmptyLine))
"#" <> _something -> Ok(#(KeepEnv, Comment(line)))
"{" <> _something -> {
use directive <- result.try(parse_directive(line))
let directive =
directive
|> canonical_directives
let is_env = is_environment(directive)
case is_env {
None -> Ok(#(KeepEnv, DirectiveLine(directive)))
Some(env_end) -> Ok(#(EnterEnv(env_end), DirectiveLine(directive)))
}
}
_ -> Ok(#(KeepEnv, ChordLine(line)))
}
}
fn parse_line_inside(
line: String,
expect_end: String,
canonical_directives: fn(Directive) -> Directive,
is_environment: fn(Directive) -> Option(String),
) -> Result(#(EnvironmentChange, LineType), String) {
case line {
line if line == expect_end -> {
use directive <- result.try(parse_directive(line))
Ok(#(ExitEnv, DirectiveLine(directive)))
}
"{" <> _something -> {
// It may be a nested env. Otherwise it is an EnvironmentLine.
case parse_directive(line) {
Error(_) -> Ok(#(KeepEnv, EnvironmentLine(line)))
Ok(dir) -> {
// Why is this so nested? (we could have downgraded the Error to None though…)
let canondir = canonical_directives(dir)
case is_environment(canondir) {
None -> Ok(#(KeepEnv, EnvironmentLine(line)))
Some(end) -> {
// TODO: add line number to the warning!
io.println_error("Warning: nested environment: " <> line)
Ok(#(EnterEnv(end), DirectiveLine(canondir)))
}
}
}
}
}
_ -> Ok(#(KeepEnv, EnvironmentLine(line)))
}
}
type FoldAccumulator {
FoldAccumulator(end_stack: List(String), parsed_lines: List(LineType))
// This represents a valid parse, so when folding this is wrapped into a Result.
}
fn fold_func(
acc: FoldAccumulator,
item: String,
line_number: Int,
canonical_directives: fn(Directive) -> Directive,
is_environment: fn(Directive) -> Option(String),
) -> Result(FoldAccumulator, String) {
// I pray this does not end up as horrible as The Horrible Fold in gmpro.gleam :-)
let with_line_number = fn(s) { "line: " <> int.to_string(line_number) <> ": " <> s }
let top_end = list.first(acc.end_stack)
let rest_end: List(String) = list.rest(acc.end_stack)
|> result.replace_error([])
|> result.unwrap_both
let new_parsed_line: Result(#(EnvironmentChange, LineType), String) = case top_end { // how should the end_stack change + the parsed line or an error.
Error(Nil) -> parse_line_outside(item, canonical_directives, is_environment)
Ok(end) -> parse_line_inside(item, end, canonical_directives, is_environment)
} |> result.map_error(with_line_number)
use #(dir, lt) <- result.try(new_parsed_line)
// We good, just return Ok(FoldAccumulator)
let new_endstack = case dir, top_end {
// This feels ugly.
KeepEnv, Ok(top) -> [top, ..rest_end]
KeepEnv, Error(Nil) -> rest_end // == []
EnterEnv(end), Ok(top) -> [end, top, ..rest_end]
EnterEnv(end), Error(Nil) -> [end, ..rest_end] // == [end]
ExitEnv, _ -> rest_end
}
let new_lines = [lt, ..acc.parsed_lines]
Ok(FoldAccumulator(new_endstack, new_lines))
}
fn fold_func_embed(
f: fn(
FoldAccumulator,
String,
Int,
fn(Directive) -> Directive,
fn(Directive) -> Option(String)
) -> Result(FoldAccumulator, String),
canonical_directives: fn(Directive) -> Directive,
is_environment: fn(Directive) -> Option(String),
) -> fn(FoldAccumulator, String, Int) -> Result(FoldAccumulator, String) {
fn(acc: FoldAccumulator, item: String, line_number: Int) -> Result(FoldAccumulator, String) {
f(acc, item, line_number, canonical_directives, is_environment)
}
}
/// This parser lets the caller specify how all the parts are parsed and
/// what the canonical names are.
pub fn parse_detailed(
input: String,
canonical_directives: fn(Directive) -> Directive,
is_environment: fn(Directive) -> Option(String),
) -> Result(List(LineType), String) {
let line_types =
input
|> string.split(on: "\n")
|> list.map(string.trim)
|> utils.fold_try_varindex(from: FoldAccumulator([], []), starting_with: 1, with: fold_func_embed(fold_func, canonical_directives, is_environment))
// For some reason I cannot `|> result.try()`, which makes me sad, so it goes to the line below :-)
|> io.debug
use line_types <- result.try(line_types)
case line_types {
FoldAccumulator([], lines) -> Ok(list.reverse(lines)) // It is faster to add lines to front!
_ -> Error("Non-empty fold accumulator: " <> string.inspect(line_types))
}
}

@ -1,6 +1,8 @@
//// Various utilities.
import gleam/list.{type ContinueOrStop, Stop, Continue}
import gleam/int
import gleam/result
/// list.fold that can both terminate early and pass index to the folding function, with an option to set the initial index
pub fn fold_until_varindex(
@ -20,3 +22,27 @@ pub fn fold_until_varindex(
}
}
}
// maybe we could have reused the above, but whatever…
pub fn fold_try_varindex(
over collection: List(a),
from accumulator: b,
starting_with index: Int,
with fun: fn(b, a, Int) -> Result(b, c),
) -> Result(b, c) {
case collection {
[] -> Ok(accumulator)
[x, ..xs] -> {
use new_acc <- result.try(fun(accumulator, x, index))
fold_try_varindex(xs, new_acc, index+1, fun)
}
}
}
/// pads list from the right to the given length. Useful for matching regexes.
pub fn pad_list(list: List(a), with item: a, to length: Int) -> List(a) {
let reversed = list.reverse(list)
let number_to_add = { length - list.length(list) } |> int.max(0)
let extended = list.fold(over: list.repeat(Nil, times: number_to_add), from: reversed, with: fn(list, _nil) {[item, ..list]})
list.reverse(extended)
}

Loading…
Cancel
Save