summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAsko Nõmm <asko@bien.ee>2022-04-15 19:56:01 +0200
committerAsko Nõmm <asko@bien.ee>2022-04-15 19:56:01 +0200
commitce96dd264fd2385303f48259e5e8d1431d22b91d (patch)
tree70b08a04aa7261c64ccb4f21f1b4bc34d86b1a2a /src
parent58970486f0c0fdb9470ef28e510f5c90060e2ced (diff)
Implement Markdown correction mechanism
This fixes the issue for ATX headings and code blocks where if there were no empty lines below and above of them it couldn't correctly identify those blocks as what they were. There's now a Markdown correction function where we can add more of these corrections in the future, making the whole thing a lot easier.
Diffstat (limited to 'src')
-rw-r--r--src/clarktown/core.clj5
-rw-r--r--src/clarktown/parser.clj99
-rw-r--r--src/clarktown/parsers/code_block.clj16
3 files changed, 111 insertions, 9 deletions
diff --git a/src/clarktown/core.clj b/src/clarktown/core.clj
index f192cc4..a6ed42c 100644
--- a/src/clarktown/core.clj
+++ b/src/clarktown/core.clj
@@ -24,4 +24,7 @@
([markdown]
(render markdown parsers/parsers))
([markdown given-parsers]
- (parser/parse markdown given-parsers))) \ No newline at end of file
+ (parser/parse markdown given-parsers)))
+
+(comment
+ (render (slurp "./test.md"))) \ No newline at end of file
diff --git a/src/clarktown/parser.clj b/src/clarktown/parser.clj
index afe9668..ecee37d 100644
--- a/src/clarktown/parser.clj
+++ b/src/clarktown/parser.clj
@@ -35,6 +35,102 @@
(drop 1 blocks))))))))
+(defn- needs-empty-line-above?
+ "Determines whether the current line needs an empty line correction
+ above."
+ [lines line index]
+ (cond
+ ; code block
+ (and (= (string/trim line) "```")
+ (> index 0)
+ (->> (take index lines)
+ (filter #(= (string/trim %) "```"))
+ count
+ odd?)
+ (not (= (-> (nth lines (- index 1))
+ string/trim) "")))
+ true
+
+ ; ATX heading block
+ (and (string/starts-with? (string/trim line) "#")
+ (> index 0)
+ (not (= (-> (nth lines (- index 1))
+ string/trim) "")))
+ true
+
+
+ ; everything else stays normal
+ :else false))
+
+
+(defn- needs-empty-line-below?
+ "Determines whether the current line needs an empty line correction
+ below."
+ [lines line index]
+ (cond
+ ; code block
+ (and (= (string/trim line) "```")
+ (< index (- (count lines) 1))
+ (->> (take index lines)
+ (filter #(= (string/trim %) "```"))
+ count
+ even?)
+ (not (= (-> (nth lines (+ index 1))
+ string/trim) "")))
+ true
+
+ ; ATX heading block
+ (and (string/starts-with? (string/trim line) "#")
+ (< index (- (count lines) 1))
+ (not (= (-> (nth lines (+ index 1))
+ string/trim) "")))
+ true
+
+ ; everything else stays normal
+ :else false))
+
+
+(defn- correct-block-separations
+ "Corrects block separations and adds newlines above or
+ below a block where needed."
+ [lines]
+ (->> lines
+ (map-indexed
+ (fn [index line]
+ (let [add-line-above? (needs-empty-line-above? lines line index)
+ add-line-below? (needs-empty-line-below? lines line index)]
+ (cond
+ ; If code block starts but there is no empty newline
+ ; above, let's fix that
+ (and add-line-above?
+ (not add-line-below?))
+ (str \newline line)
+
+ ; If the code block ends, but there is no empty newline
+ ; below, let's fix that.
+ (and add-line-below?
+ (not add-line-above?))
+ (str line \newline)
+
+ ; If the code block needs a newline both above and below,
+ ; let's fix that.
+ (and add-line-above?
+ add-line-below?)
+ (str \newline line \newline)
+
+ ; otherwise is what it is
+ :else line))))))
+
+
+(defn- correct-markdown
+ "Corrects invalid Markdown for the parser."
+ [markdown]
+ (let [lines (string/split-lines markdown)]
+ (->> lines
+ correct-block-separations
+ (string/join \newline))))
+
+
(defn- find-parser-by-block
"Find a parser from `parsers` that matches the given `block`."
[parsers block]
@@ -92,7 +188,8 @@
(defn parse
"Parses given `markdown` with `parsers`."
[markdown parsers]
- (let [blocks (-> (string/split markdown #"\n\n")
+ (let [blocks (-> (correct-markdown markdown)
+ (string/split #"\n\n")
stitch-code-blocks)
parsed-blocks (parse-blocks blocks parsers)]
(string/join "\n\n" parsed-blocks)))
diff --git a/src/clarktown/parsers/code_block.clj b/src/clarktown/parsers/code_block.clj
index 388776f..c6ecfea 100644
--- a/src/clarktown/parsers/code_block.clj
+++ b/src/clarktown/parsers/code_block.clj
@@ -16,13 +16,15 @@
(let [language (->> block
(re-find #"\`\`\`(\w+)")
second)
- code (as-> block n
- (string/replace-first n #"\`\`\`(\w+)?\n" "")
- (subs n 0 (- (count n) 4))
- (string/replace n #"&" "&amp;")
- (string/replace n #"<" "&lt;")
- (string/replace n #">" "&gt;")
- (string/trim n))]
+ lines (string/split-lines block)
+ block* (->> (next lines)
+ (take (- (count lines) 2))
+ (string/join \newline))
+ code (-> block*
+ (string/replace #"&" "&amp;")
+ (string/replace #"<" "&lt;")
+ (string/replace #">" "&gt;")
+ string/trim)]
(if language
(str "<pre><code class=\"language-" language "\">" code "</code></pre>")
(str "<pre><code>" code "</code></pre>"))))