From ce96dd264fd2385303f48259e5e8d1431d22b91d Mon Sep 17 00:00:00 2001 From: Asko Nõmm Date: Fri, 15 Apr 2022 19:56:01 +0200 Subject: Implement Markdown correction mechanism This fixes the issue for ATX headings and code blocks where if there were no empty lines below and above of them it couldn't correctly identify those blocks as what they were. There's now a Markdown correction function where we can add more of these corrections in the future, making the whole thing a lot easier. --- resources/test/core.md | 9 +++- resources/test/core_result.html | 10 +++- src/clarktown/core.clj | 5 +- src/clarktown/parser.clj | 99 +++++++++++++++++++++++++++++++++++- src/clarktown/parsers/code_block.clj | 16 +++--- 5 files changed, 128 insertions(+), 11 deletions(-) diff --git a/resources/test/core.md b/resources/test/core.md index afc972a..f65b20e 100644 --- a/resources/test/core.md +++ b/resources/test/core.md @@ -83,4 +83,11 @@ This is a H1 heading with settext ================================= And this is a H2 heading with settext -------------------------------------- \ No newline at end of file +------------------------------------- + +Testing paragraph right before a code block +``` +code goes here +``` +# Heading goes here +Paragraph right after heading \ No newline at end of file diff --git a/resources/test/core_result.html b/resources/test/core_result.html index aaad44a..39e8562 100644 --- a/resources/test/core_result.html +++ b/resources/test/core_result.html @@ -58,4 +58,12 @@ function markdownToHTML(markdown) {

This is a H1 heading with settext

-

And this is a H2 heading with settext

\ No newline at end of file +

And this is a H2 heading with settext

+ +

Testing paragraph right before a code block

+ +
code goes here
+ +

Heading goes here

+ +

Paragraph right after heading

\ No newline at end of file diff --git a/src/clarktown/core.clj b/src/clarktown/core.clj index f192cc4..a6ed42c 100644 --- a/src/clarktown/core.clj +++ b/src/clarktown/core.clj @@ -24,4 +24,7 @@ ([markdown] (render markdown parsers/parsers)) ([markdown given-parsers] - (parser/parse markdown given-parsers))) \ No newline at end of file + (parser/parse markdown given-parsers))) + +(comment + (render (slurp "./test.md"))) \ No newline at end of file diff --git a/src/clarktown/parser.clj b/src/clarktown/parser.clj index afe9668..ecee37d 100644 --- a/src/clarktown/parser.clj +++ b/src/clarktown/parser.clj @@ -35,6 +35,102 @@ (drop 1 blocks)))))))) +(defn- needs-empty-line-above? + "Determines whether the current line needs an empty line correction + above." + [lines line index] + (cond + ; code block + (and (= (string/trim line) "```") + (> index 0) + (->> (take index lines) + (filter #(= (string/trim %) "```")) + count + odd?) + (not (= (-> (nth lines (- index 1)) + string/trim) ""))) + true + + ; ATX heading block + (and (string/starts-with? (string/trim line) "#") + (> index 0) + (not (= (-> (nth lines (- index 1)) + string/trim) ""))) + true + + + ; everything else stays normal + :else false)) + + +(defn- needs-empty-line-below? + "Determines whether the current line needs an empty line correction + below." + [lines line index] + (cond + ; code block + (and (= (string/trim line) "```") + (< index (- (count lines) 1)) + (->> (take index lines) + (filter #(= (string/trim %) "```")) + count + even?) + (not (= (-> (nth lines (+ index 1)) + string/trim) ""))) + true + + ; ATX heading block + (and (string/starts-with? (string/trim line) "#") + (< index (- (count lines) 1)) + (not (= (-> (nth lines (+ index 1)) + string/trim) ""))) + true + + ; everything else stays normal + :else false)) + + +(defn- correct-block-separations + "Corrects block separations and adds newlines above or + below a block where needed." + [lines] + (->> lines + (map-indexed + (fn [index line] + (let [add-line-above? (needs-empty-line-above? lines line index) + add-line-below? (needs-empty-line-below? lines line index)] + (cond + ; If code block starts but there is no empty newline + ; above, let's fix that + (and add-line-above? + (not add-line-below?)) + (str \newline line) + + ; If the code block ends, but there is no empty newline + ; below, let's fix that. + (and add-line-below? + (not add-line-above?)) + (str line \newline) + + ; If the code block needs a newline both above and below, + ; let's fix that. + (and add-line-above? + add-line-below?) + (str \newline line \newline) + + ; otherwise is what it is + :else line)))))) + + +(defn- correct-markdown + "Corrects invalid Markdown for the parser." + [markdown] + (let [lines (string/split-lines markdown)] + (->> lines + correct-block-separations + (string/join \newline)))) + + (defn- find-parser-by-block "Find a parser from `parsers` that matches the given `block`." [parsers block] @@ -92,7 +188,8 @@ (defn parse "Parses given `markdown` with `parsers`." [markdown parsers] - (let [blocks (-> (string/split markdown #"\n\n") + (let [blocks (-> (correct-markdown markdown) + (string/split #"\n\n") stitch-code-blocks) parsed-blocks (parse-blocks blocks parsers)] (string/join "\n\n" parsed-blocks))) diff --git a/src/clarktown/parsers/code_block.clj b/src/clarktown/parsers/code_block.clj index 388776f..c6ecfea 100644 --- a/src/clarktown/parsers/code_block.clj +++ b/src/clarktown/parsers/code_block.clj @@ -16,13 +16,15 @@ (let [language (->> block (re-find #"\`\`\`(\w+)") second) - code (as-> block n - (string/replace-first n #"\`\`\`(\w+)?\n" "") - (subs n 0 (- (count n) 4)) - (string/replace n #"&" "&") - (string/replace n #"<" "<") - (string/replace n #">" ">") - (string/trim n))] + lines (string/split-lines block) + block* (->> (next lines) + (take (- (count lines) 2)) + (string/join \newline)) + code (-> block* + (string/replace #"&" "&") + (string/replace #"<" "<") + (string/replace #">" ">") + string/trim)] (if language (str "
" code "
") (str "
" code "
")))) -- cgit v1.2.3