From 83b98dbcdc70c91665a189de070d7c2db4e49b34 Mon Sep 17 00:00:00 2001 From: Pavel 'LEdoian' Turinsky Date: Mon, 9 Sep 2024 23:52:21 +0200 Subject: [PATCH] Add initial version of gmpro library --- gmpro/.gitignore | 4 ++ gmpro/README.md | 33 +++++++++ gmpro/gleam.toml | 22 ++++++ gmpro/manifest.toml | 19 +++++ gmpro/src/gmpro.gleam | 135 ++++++++++++++++++++++++++++++++++++ gmpro/src/gmpro/demo.gleam | 24 +++++++ gmpro/src/gmpro/utils.gleam | 22 ++++++ gmpro/test/gmpro_test.gleam | 12 ++++ 8 files changed, 271 insertions(+) create mode 100644 gmpro/.gitignore create mode 100644 gmpro/README.md create mode 100644 gmpro/gleam.toml create mode 100644 gmpro/manifest.toml create mode 100644 gmpro/src/gmpro.gleam create mode 100644 gmpro/src/gmpro/demo.gleam create mode 100644 gmpro/src/gmpro/utils.gleam create mode 100644 gmpro/test/gmpro_test.gleam diff --git a/gmpro/.gitignore b/gmpro/.gitignore new file mode 100644 index 0000000..599be4e --- /dev/null +++ b/gmpro/.gitignore @@ -0,0 +1,4 @@ +*.beam +*.ez +/build +erl_crash.dump diff --git a/gmpro/README.md b/gmpro/README.md new file mode 100644 index 0000000..a899b85 --- /dev/null +++ b/gmpro/README.md @@ -0,0 +1,33 @@ +# GmPro + +A Gleam parser for ChordPro files + +(the name is a silly pun on the G minor chord lol) + +So far it lives inside the [netzpevnik](https://gitea.ledoian.cz/LEdoian/netzpevnik) repository, when it becomes reasonably usable, I will probably move it to a dedicated repository. (Git submodules are not so comfy while developing both projects…) + + + +```sh +gleam add gmpro@1 +``` +```gleam +import gmpro + +pub fn main() { + // TODO: An example of the project in use +} +``` + + + +## Development + +```sh +#gleam run # Run the project – does nothing +gleam test # Run the tests (TODO: someone write those!) +gleam run -m gmpro/demo # Run a trivial demo that uses the library +``` diff --git a/gmpro/gleam.toml b/gmpro/gleam.toml new file mode 100644 index 0000000..5b0b706 --- /dev/null +++ b/gmpro/gleam.toml @@ -0,0 +1,22 @@ +name = "gmpro" +version = "1.0.0" + +# Fill out these fields if you intend to generate HTML documentation or publish +# your project to the Hex package manager. +# +# description = "" +# licences = ["Apache-2.0"] +# repository = { type = "github", user = "", repo = "" } +# links = [{ title = "Website", href = "" }] +# +# For a full reference of all the available options, you can have a look at +# https://gleam.run/writing-gleam/gleam-toml/. + +[dependencies] +gleam_stdlib = ">= 0.34.0 and < 2.0.0" + +[dev-dependencies] +gleeunit = ">= 1.0.0 and < 2.0.0" +simplifile = ">= 2.1.0 and < 3.0.0" +pprint = ">= 1.0.3 and < 2.0.0" +argv = ">= 1.0.2 and < 2.0.0" diff --git a/gmpro/manifest.toml b/gmpro/manifest.toml new file mode 100644 index 0000000..303aa3b --- /dev/null +++ b/gmpro/manifest.toml @@ -0,0 +1,19 @@ +# This file was generated by Gleam +# You typically do not need to edit this file + +packages = [ + { name = "argv", version = "1.0.2", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "BA1FF0929525DEBA1CE67256E5ADF77A7CDDFE729E3E3F57A5BDCAA031DED09D" }, + { name = "filepath", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "EFB6FF65C98B2A16378ABC3EE2B14124168C0CE5201553DE652E2644DCFDB594" }, + { name = "glam", version = "2.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "glam", source = "hex", outer_checksum = "66EC3BCD632E51EED029678F8DF419659C1E57B1A93D874C5131FE220DFAD2B2" }, + { name = "gleam_stdlib", version = "0.40.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "86606B75A600BBD05E539EB59FABC6E307EEEA7B1E5865AFB6D980A93BCB2181" }, + { name = "gleeunit", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "F7A7228925D3EE7D0813C922E062BFD6D7E9310F0BEE585D3A42F3307E3CFD13" }, + { name = "pprint", version = "1.0.3", build_tools = ["gleam"], requirements = ["glam", "gleam_stdlib"], otp_app = "pprint", source = "hex", outer_checksum = "76BBB92E23D12D954BD452686543F29EDE8EBEBB7FC0ACCBCA66EEF276EC3A06" }, + { name = "simplifile", version = "2.1.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "BDD04F5D31D6D34E2EDFAEF0B68A6297AEC939888C3BFCE61133DE13857F6DA2" }, +] + +[requirements] +argv = { version = ">= 1.0.2 and < 2.0.0" } +gleam_stdlib = { version = ">= 0.34.0 and < 2.0.0" } +gleeunit = { version = ">= 1.0.0 and < 2.0.0" } +pprint = { version = ">= 1.0.3 and < 2.0.0" } +simplifile = { version = ">= 2.1.0 and < 3.0.0" } diff --git a/gmpro/src/gmpro.gleam b/gmpro/src/gmpro.gleam new file mode 100644 index 0000000..91d43da --- /dev/null +++ b/gmpro/src/gmpro.gleam @@ -0,0 +1,135 @@ +//// This module implements various parsers for ChordPro-style formats. +//// +//// We try to be as unopinionated in the `parse_base` as possible, which tries to mimic a tokenizer and recognise various features of the file. Upon that, there ~~are~~ will be parsers like `parse_chordpro6` to parse specific chordpro versions according to the specification. (This distinction should make it easy to both add own dialects of ChordPro as well as implement future versions, hopefully.) +//// +//// **Note:** we do *not* support nested environments. I have no idea whether they appear in the wild. And by “not support” I mean they will be mis-parsed silently probably, because I'm lazy to even detect them :-) + +// … however, nested environments should only concern The Horrible Fold below, fixing that should do the trick. + +// NOTE: this code is horrible. We need to refactor it into sub-parsers so that The Horrible Fold is not too horrible and is extensible OTOH. + +import gleam/string +import gleam/list.{type ContinueOrStop, Continue, Stop} +import gleam/int +import gleam/regex +import gleam/option +import gleam/result +import gmpro/utils.{fold_until_varindex} + +import gleam/io + +// Imma kms: +// - the parse can fail, so it should be in a Result. +// - the fail should be correctly reported, so the fold should also take in the line number, i.e. index_fold that would start with index 1 +// - we need to track in which environment we are (TODO: nested environments), parse the parameters of the environment (to grep the correct name of env w/o params, condition, closing brace) + +/// This is tricky: we want to parse *all* ChordPro-style formats (esp. previous and future versions as long as they are similar), but we need to skip delegated (e.g. abc) and special (e.g. grid) environments early and not parse their contents here. +/// This means that we will need to parse each environment separately, but already +pub type LineType { + ChordLine(text: String) + Directive(text: String) + Comment(text: String) + EmptyLine + Environment(start: String, name: String, inner_text: String) +} +type HorribleState { + Outside + InEnvironment(start: String, name: String, inner_text_reverse: List(String)) +} + +type HorribleStateMachine { + HorribleStateMachine( + state: HorribleState, + already_parsed: List(LineType), + ) +} + +fn horrible_outside_wrap(acc: HorribleStateMachine, new: LineType) -> HorribleStateMachine { + HorribleStateMachine(state: Outside, already_parsed: list.append(acc.already_parsed, [new])) +} + +const valid_environ_identifier_re = "[a-zA-Z0-9_]+" + +fn horrible_regex(prefix: String, suffix: String) -> regex.Regex { + let assert Ok(regex) = regex.from_string("^" <> prefix <> "(" <> valid_environ_identifier_re <> ")" <> suffix <> ".*") + regex +} + +/// Initial: Ok(HorribleStateMachine(Outside, [])) +fn the_horrible_fold(acc: Result(HorribleStateMachine, String), line: String, line_number: Int) -> ContinueOrStop(Result(HorribleStateMachine, String)) { + let with_line = fn(s) { "line: " <> int.to_string(line_number) <> ": " <> s } + case acc { + Ok(hsm) -> { + case hsm.state { + Outside -> { + case line { + "{start_of_" <> _something -> { + // This is a horrible way to parse the environment name… + let regex = horrible_regex("{start_of_", "[-}:]") + let matches = regex.scan(with: regex, content: line) + let verdict = { + use match <- result.try(list.first(matches) |> result.replace_error(with_line("No regex match for environment start!"))) + use subm <- result.try(list.first(match.submatches) |> result.replace_error(with_line("No name found in environment start (SUSPICIOUS)!"))) + use name <- result.try(option.to_result(subm, with_line("Empty name in environment start (SUSPICIOUS)!"))) + Ok(HorribleStateMachine(..hsm, state: InEnvironment(start: line, name: name, inner_text_reverse: []))) + } + case verdict { + Ok(_) -> Continue(verdict) + Error(_) -> Stop(verdict) + } + } + "{" <> _something -> Continue(Ok(horrible_outside_wrap(hsm, Directive(line)))) + "#" <> _something -> Continue(Ok(horrible_outside_wrap(hsm, Comment(line)))) // We keep comments, so we can re-export the file in the future. However, most of the code will just ignore those. + "" -> Continue(Ok(horrible_outside_wrap(hsm, EmptyLine))) + _ -> Continue(Ok(horrible_outside_wrap(hsm, ChordLine(line)))) + } + } + InEnvironment(start: start, name: name, inner_text_reverse: contents) -> { + case line { + "{end_of_" <> rest -> { + let expected_rest = name <> "}" + case rest == expected_rest { + True -> Continue(Ok(horrible_outside_wrap(hsm, Environment(start:, name:, inner_text: contents |> list.reverse |> string.join("\n"))))) + False -> { + // This sounds fishy! + io.println_error(with_line("Suspicious end of section: " <> line)) + Continue(Ok(HorribleStateMachine(..hsm, state: InEnvironment(start:start, name:name, inner_text_reverse: [line, ..contents])))) + } + } +// let regex = horrible_regex("{end_of_", "}") +// let matches = regex.scan(with: regex, content: line) +// let verdict = { +// use match <- result.try(list.first(matches) |> result.replace_error(with_line("No regex match for environment end!"))) +// use subm <- result.try(list.first(match.submatches) |> result.replace_error(with_line("No name found in environment end (SUSPICIOUS)!"))) +// use name <- result.try(option.to_result(subm, with_line("Empty name in environment end (SUSPICIOUS)!"))) +// Ok(HorribleStateMachine(..hsm, state: InEnvironment(start: line, name: name, inner_text_reverse: []))) +// } +// case verdict { +// Ok(_) -> Continue(verdict) +// Error(_) -> Stop(verdict) +// } + } + _ -> Continue(Ok(HorribleStateMachine(..hsm, state: InEnvironment(start:start, name:name, inner_text_reverse: [line, ..contents])))) // the compiler cannot InEnvironment(..hsm.state, inner_text_reverse: ...) :-( + } + } + } + } + Error(s) -> panic as { "the horrible fold got this Error as accumulator: " <> s } + } +} + +// Songs are short, so having the parser run multiple times over the whole document is cheap. Therefore we first parse into Directives, TextLines and Environments (and Comments and EmptyLines for completeness) as the general format, and then we recursively parse the known Environments and specific known stuff. + +pub fn parse_base(data s: String) -> Result(List(LineType), String) { + let parsed = + s + |> string.split(on: "\n") + |> list.map(with: string.trim) + |> fold_until_varindex(from: Ok(HorribleStateMachine(Outside, [])), starting_with: 1, with: the_horrible_fold) + result.try(parsed, fn(end_state) { + case end_state { + HorribleStateMachine(Outside, already_parsed) -> Ok(already_parsed) + HorribleStateMachine(InEnvironment(start, _name, itr), already_parsed) -> Error("Parse ended in env " <> start <> " with content " <> string.join(itr, "\n") <> " having parsed " <> int.to_string(list.length(already_parsed)) <> " lines") // cannot print already_parsed :-/ + } + }) +} diff --git a/gmpro/src/gmpro/demo.gleam b/gmpro/src/gmpro/demo.gleam new file mode 100644 index 0000000..c50aab1 --- /dev/null +++ b/gmpro/src/gmpro/demo.gleam @@ -0,0 +1,24 @@ +import gmpro +import simplifile +import gleam/list +import gleam/io +import gleam/result +import pprint +import argv + +pub fn main() { + let verdict = + argv.load().arguments + |> list.first + |> result.unwrap(or: "../divka.cho") + |> simplifile.read + |> result.map_error(fn(err) { + io.debug(err) + "simplifile failed." + }) + |> result.map(gmpro.parse_base) + case verdict { + Ok(result) -> io.println(pprint.styled(result)) + Error(err) -> io.println_error(err) + } +} diff --git a/gmpro/src/gmpro/utils.gleam b/gmpro/src/gmpro/utils.gleam new file mode 100644 index 0000000..2fd7592 --- /dev/null +++ b/gmpro/src/gmpro/utils.gleam @@ -0,0 +1,22 @@ +//// Various utilities. + +import gleam/list.{type ContinueOrStop, Stop, Continue} + +/// list.fold that can both terminate early and pass index to the folding function, with an option to set the initial index +pub fn fold_until_varindex( + over collection: List(a), + from accumulator: b, + starting_with index: Int, + with fun: fn(b, a, Int) -> ContinueOrStop(b), + ) -> b { + case collection { + [] -> accumulator + [x, ..xs] -> { + let result = fun(accumulator, x, index) + case result { + Stop(new_acc) -> new_acc + Continue(new_acc) -> fold_until_varindex(over: xs, from: new_acc, starting_with: index + 1, with: fun) + } + } + } +} diff --git a/gmpro/test/gmpro_test.gleam b/gmpro/test/gmpro_test.gleam new file mode 100644 index 0000000..3831e7a --- /dev/null +++ b/gmpro/test/gmpro_test.gleam @@ -0,0 +1,12 @@ +import gleeunit +import gleeunit/should + +pub fn main() { + gleeunit.main() +} + +// gleeunit test functions end in `_test` +pub fn hello_world_test() { + 1 + |> should.equal(1) +}