summaryrefslogtreecommitdiff
path: root/src/clarktown/parser.clj
blob: ecee37daa3507de3992df0a893d8a18204180da4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
(ns clarktown.parser
  (:require
    [clojure.string :as string]))


(defn- stitch-code-blocks
  "Since code blocks can span multiple blocks (a block is separated by
  two line breaks from another block) , we need to stitch them together
  into one block in order for a block parser to be able to do anything
  with it."
  [blocks]
  (loop [stitched-blocks []
         code-block-started? false
         blocks blocks]
    (if (empty? blocks)
      stitched-blocks
      (let [block (first blocks)]
        (if (and (string/starts-with? (string/trim block) "```")
                 (not (string/ends-with? (string/trim block) "```")))
          (recur (conj stitched-blocks block)
                 true
                 (drop 1 blocks))
          (if code-block-started?
            (let [last-block (last stitched-blocks)
                  last-block-index (- (count stitched-blocks) 1)]
              (if (string/ends-with? (string/trim block) "```")
                (recur (assoc stitched-blocks last-block-index (str last-block "\n\n" block))
                       false
                       (drop 1 blocks))
                (recur (assoc stitched-blocks last-block-index (str last-block "\n\n" block))
                       true
                       (drop 1 blocks))))
            (recur (conj stitched-blocks block)
                   false
                   (drop 1 blocks))))))))


(defn- needs-empty-line-above?
  "Determines whether the current line needs an empty line correction
  above."
  [lines line index]
  (cond
    ; code block
    (and (= (string/trim line) "```")
         (> index 0)
         (->> (take index lines)
              (filter #(= (string/trim %) "```"))
              count
              odd?)
         (not (= (-> (nth lines (- index 1))
                     string/trim) "")))
    true

    ; ATX heading block
    (and (string/starts-with? (string/trim line) "#")
         (> index 0)
         (not (= (-> (nth lines (- index 1))
                     string/trim) "")))
    true


    ; everything else stays normal
    :else false))


(defn- needs-empty-line-below?
  "Determines whether the current line needs an empty line correction
  below."
  [lines line index]
  (cond
    ; code block
    (and (= (string/trim line) "```")
         (< index (- (count lines) 1))
         (->> (take index lines)
              (filter #(= (string/trim %) "```"))
              count
              even?)
         (not (= (-> (nth lines (+ index 1))
                     string/trim) "")))
    true

    ; ATX heading block
    (and (string/starts-with? (string/trim line) "#")
         (< index (- (count lines) 1))
         (not (= (-> (nth lines (+ index 1))
                     string/trim) "")))
    true

    ; everything else stays normal
    :else false))


(defn- correct-block-separations
  "Corrects block separations and adds newlines above or
  below a block where needed."
  [lines]
  (->> lines
       (map-indexed
         (fn [index line]
           (let [add-line-above? (needs-empty-line-above? lines line index)
                 add-line-below? (needs-empty-line-below? lines line index)]
             (cond
               ; If code block starts but there is no empty newline
               ; above, let's fix that
               (and add-line-above?
                    (not add-line-below?))
               (str \newline line)

               ; If the code block ends, but there is no empty newline
               ; below, let's fix that.
               (and add-line-below?
                    (not add-line-above?))
               (str line \newline)

               ; If the code block needs a newline both above and below,
               ; let's fix that.
               (and add-line-above?
                    add-line-below?)
               (str \newline line \newline)

               ; otherwise is what it is
               :else line))))))


(defn- correct-markdown
  "Corrects invalid Markdown for the parser."
  [markdown]
  (let [lines (string/split-lines markdown)]
    (->> lines
         correct-block-separations
         (string/join \newline))))


(defn- find-parser-by-block
  "Find a parser from `parsers` that matches the given `block`."
  [parsers block]
  (->> parsers
       (filter
         (fn [{:keys [matcher]}]
           (when matcher
             (matcher block))))
       first))


(defn- parse-block-with-known-parser
  "Parses a given `block` with a known `parser`."
  [parser parsers block]
  (loop [block block
         renderers (:renderers parser)]
    (if (empty? renderers)
      block
      (let [renderer (first renderers)]
        (recur (renderer block parsers)
               (drop 1 renderers))))))


(defn- parse-block-with-unknown-parsers
  "Parses the given `block` with all the parsers that do not have
  a matcher function, useful for any fallback parsing one might want
  to do."
  [parsers block]
  (loop [block block
         parsers (filter #(= nil (:matcher %)) parsers)]
    (if (empty? parsers)
      block
      (recur (loop [block block
                    renderers (:renderers (first parsers))]
               (if (empty? renderers)
                 block
                 (let [renderer (first renderers)]
                   (recur (renderer block parsers)
                          (drop 1 renderers)))))
             (drop 1 parsers)))))


(defn- parse-blocks
  "Parses each individual Markdown block, given as `blocks`, with
  the list of `parsers`."
  [blocks parsers]
  (for [block blocks]
    (if-let [parser (find-parser-by-block parsers block)]
      (->> (string/trim block)
           (parse-block-with-known-parser parser parsers))
      (->> (string/trim block)
           (parse-block-with-unknown-parsers parsers)))))


(defn parse
  "Parses given `markdown` with `parsers`."
  [markdown parsers]
  (let [blocks (-> (correct-markdown markdown)
                   (string/split #"\n\n")
                   stitch-code-blocks)
        parsed-blocks (parse-blocks blocks parsers)]
    (string/join "\n\n" parsed-blocks)))