summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAsko Nõmm <asko@nmm.ee>2025-09-20 19:58:00 +0300
committerAsko Nõmm <asko@nmm.ee>2025-09-20 19:58:00 +0300
commit9f31cc6a5aae83f7986e733603e62e91eae31196 (patch)
tree048607eb1a2b96cc4a1833272ff6817aa8b73cbe
parent7062dcc7d32b9a639ec242ee60e0c2daeb4f1552 (diff)
Many fixes to coordinates logic in capturing attributes, and more tests. Coordinates ns is shaping up well.
-rw-r--r--src/dompa/coordinates.cljc130
-rw-r--r--test/dompa/coordinates_test.cljc118
2 files changed, 209 insertions, 39 deletions
diff --git a/src/dompa/coordinates.cljc b/src/dompa/coordinates.cljc
index 8610b8b..3584dc9 100644
--- a/src/dompa/coordinates.cljc
+++ b/src/dompa/coordinates.cljc
@@ -61,11 +61,13 @@
:start-idx 0
:coordinates []}
indexed-html (map-indexed vector html)]
- (-> (compose-reducer-fn (count indexed-html))
- (reduce default-state indexed-html)
- :coordinates)))
+ {:html html
+ :coordinates (-> (compose-reducer-fn (count indexed-html))
+ (reduce default-state indexed-html)
+ :coordinates)}))
-(defn- coordinates->tag-name [html [start end]]
+(defn- coordinates->tag-name
+ [html [start end]]
(let [value (subs html start end)]
(if (str/starts-with? value "<")
(->> (subs html start end)
@@ -74,11 +76,13 @@
(apply str))
value)))
-(defn- name-coordinates-fn [html]
+(defn- name-coordinates-fn
+ [html]
(fn [idx coordinate]
[idx (coordinates->tag-name html coordinate)]))
-(defn- last-by-tag-name-idx [html coordinates name start]
+(defn- last-by-tag-name-idx
+ [html coordinates name start]
(let [filter-fn (fn [[_ end]] (< end start))
filtered-coordinates (filter filter-fn coordinates)
index-fn (name-coordinates-fn html)
@@ -88,7 +92,8 @@
last
first)))
-(defn- unify-one [html coordinates [start end]]
+(defn- unify-one
+ [html coordinates [start end]]
(let [name (coordinates->tag-name html [start end])
matching-idx (last-by-tag-name-idx html coordinates name start)]
(if matching-idx
@@ -96,7 +101,8 @@
(assoc coordinates matching-idx [matching-start end]))
coordinates)))
-(defn- unify-reducer-fn [html]
+(defn- unify-reducer-fn
+ [html]
(fn [coordinates [start end]]
(if (and (= \< (nth html start))
(= \/ (nth html (inc start) nil)))
@@ -115,9 +121,10 @@
because non-unified coordinates are blind to the context
in which they live, having only had one pass over the
raw HTML string which composes the initial coordinates."
- [html coordinates]
- (-> (unify-reducer-fn html)
- (reduce [] coordinates)))
+ [{:keys [html coordinates]}]
+ {:html html
+ :coordinates (-> (unify-reducer-fn html)
+ (reduce [] coordinates))})
(defn- children
[coordinates [from to]]
@@ -174,15 +181,79 @@
v (if (nil? v) true (normalize-html-attr-str v))]
{k v}))
-(defn- html-str->node-attrs [html]
+(defn- html->str->node-attrs-reducer
+ [attrs-html]
+ (fn [{:keys [start-idx has-attrs? attrs] :as state} [idx c]]
+ (cond
+ ; end of attrs-html, so lets collect whatever there is left
+ (= (count attrs-html) (inc idx))
+ {:start-idx 0
+ :has-attrs? has-attrs?
+ :attrs (conj attrs (subs attrs-html start-idx (inc idx)))}
+
+ ; encountered a space, but there's no attrs, we're good
+ ; to collect whatever there is.
+ (and (= \space c)
+ (not has-attrs?))
+ {:start-idx (inc idx)
+ :has-attrs? false
+ :attrs (conj attrs (subs attrs-html start-idx idx))}
+
+ ; if we discover a = with quote after it,
+ ; it means we have attrs
+ (and (= \= c)
+ (= \" (get attrs-html (inc idx))))
+ {:start-idx start-idx
+ :has-attrs? true
+ :attrs attrs}
+
+ ; quote with either space next or nothing next,
+ ; and no = before, and we have attrs
+ (and (= \" c)
+ (not (= \= (get attrs-html (dec idx))))
+ (or (nil? (get attrs-html (inc idx)))
+ (= \space (get attrs-html (inc idx))))
+ has-attrs?)
+ {:start-idx (inc idx)
+ :has-attrs? false
+ :attrs (conj attrs (subs attrs-html start-idx (inc idx)))}
+
+ :else state)))
+
+(defn- html->str->attrs-html-str [html]
+ (->> (subs html 1)
+ (take-while #(not (contains? #{\> \/} %)))
+ (partition-by #(= % \space))
+ (drop 1)
+ flatten
+ (reduce str)
+ str/trim))
+
+(defn- html-str->node-attrs
+ "Turns a given `html` string into an attribute map, e.g:
+
+ ```html
+ <input type=\"checkbox\" checked />
+ ```
+
+ Would become:
+
+ ```clojure
+ {:type \"checkbox\"
+ :checked true}
+ ```"
+ [html]
(when (str/starts-with? html "<")
- (->> (subs html 1)
- (take-while #(not (contains? #{\> \/} %)))
- (partition-by #(= % \space))
- (drop 1)
- (filter #(not= (-> % first) \space))
- (map parse-html-attr-str)
- (into {}))))
+ (let [attrs-html (html->str->attrs-html-str html)
+ indexed-attrs-html (map-indexed vector attrs-html)
+ default-reducer-state {:start-idx 0
+ :has-attrs? false
+ :attrs []}]
+ (as-> (html->str->node-attrs-reducer attrs-html) $
+ (reduce $ default-reducer-state indexed-attrs-html)
+ (remove str/blank? (:attrs $))
+ (map parse-html-attr-str $)
+ (into {} $)))))
(defn- construct-node
[node-html node-children]
@@ -198,14 +269,25 @@
(defn ->nodes
"Transform given `html` according to given `coordinates` into
- a tree of nodes, each representing one HTML node and its children."
- [html coordinates]
+ a tree of nodes, each representing one HTML node and its children.
+
+ Direct output of both `compose` and `unify` can be given to this
+ function, allowing chaining such as:
+
+ ```clojure
+ (-> \"some html ...\"
+ coordinates/compose
+ coordinates/unify
+ coordinates/->nodes)
+ ```"
+ [{:keys [html coordinates]}]
(when (seq coordinates)
(let [sorted-coordinates (sort-by first coordinates)
[parent-from parent-to] (first sorted-coordinates)
children (children sorted-coordinates [parent-from parent-to])
remaining (without-children sorted-coordinates [parent-from parent-to])
node-html (subs html parent-from (inc parent-to))
- node-children (->nodes html children)]
- (cons (construct-node node-html node-children)
- (->nodes html remaining))))) \ No newline at end of file
+ node-children (->nodes {:html html :coordinates children})]
+ (-> (cons (construct-node node-html node-children)
+ (->nodes {:html html :coordinates remaining}))
+ vec)))) \ No newline at end of file
diff --git a/test/dompa/coordinates_test.cljc b/test/dompa/coordinates_test.cljc
index a827968..eb118d2 100644
--- a/test/dompa/coordinates_test.cljc
+++ b/test/dompa/coordinates_test.cljc
@@ -4,28 +4,116 @@
(deftest compose-test
(testing "Create coordinates"
- (is (= [[0 4] [5 9] [10 15]]
- (coordinates/compose "<div>hello</div>"))))
+ (let [html "<div>hello</div>"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 9] [10 15]]}
+ (coordinates/compose html)))))
(testing "Create coordinates with invalid HTML"
- (is (= [[0 4] [5 9]]
- (coordinates/compose "<div>hello")))
+ (let [html "<div>hello"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 9]]}
+ (coordinates/compose html))))
- (is (= []
- (coordinates/compose "<div")))
+ (let [html "<div>hello</span>"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 9] [10 16]]}
+ (coordinates/compose html))))
- (is (= [[0 3]]
- (coordinates/compose "div>")))
+ (let [html "<div"]
+ (is (= {:html html
+ :coordinates []}
+ (coordinates/compose html))))
- (is (= [[0 1]]
- (coordinates/compose "<>"))))
+ (let [html "div>"]
+ (is (= {:html html
+ :coordinates [[0 3]]}
+ (coordinates/compose html))))
+
+ (let [html "<>"]
+ (is (= {:html html
+ :coordinates [[0 1]]}
+ (coordinates/compose html)))))
(testing "Create coordinates with just text"
- (is (= [[0 4]]
- (coordinates/compose "hello"))))
+ (let [html "hello"]
+ (is (= {:html html
+ :coordinates [[0 4]]}
+ (coordinates/compose html)))))
(testing "Create coordinates with text starting"
- (is (= [[0 4] [5 9] [10 15]]
- (coordinates/compose "hello<div></div>")))))
+ (let [html "hello<div></div>"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 9] [10 15]]}
+ (coordinates/compose html)))))
+
+ (testing "Create coordinates with text ending"
+ (let [html "<div></div>hello"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 10] [11 15]]}
+ (coordinates/compose html))))))
+
+(deftest unify-test
+ (testing "Unify coordinates"
+ (let [html "<div>hello</div>"]
+ (is (= {:html html
+ :coordinates [[0 15] [5 9]]}
+ (-> (coordinates/compose html)
+ coordinates/unify)))))
+
+ (testing "Unify coordinates with invalid HTML"
+ (let [html "<div>hello"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 9]]}
+ (-> (coordinates/compose html)
+ coordinates/unify))))
+
+ (let [html "<div>hello</span>"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 9]]}
+ (-> (coordinates/compose html)
+ coordinates/unify)))))
+
+ (testing "Unify coordinates with just text"
+ (let [html "hello"]
+ (is (= {:html html
+ :coordinates [[0 4]]}
+ (-> (coordinates/compose html)
+ coordinates/unify)))))
+
+ (testing "Unify coordinates with text starting"
+ (let [html "hello<div></div>"]
+ (is (= {:html html
+ :coordinates [[0 4] [5 15]]}
+ (-> (coordinates/compose html)
+ coordinates/unify)))))
+
+ (testing "Unify coordinates with text ending"
+ (let [html "<div></div>hello"]
+ (is (= {:html html
+ :coordinates [[0 10] [11 15]]}
+ (-> (coordinates/compose html)
+ coordinates/unify))))))
+
+(deftest nodes-test
+ (testing "Create nodes"
+ (is (= [{:name :div
+ :attrs {}
+ :children [{:name :dompa/text
+ :value "hello"}]}]
+ (-> "<div>hello</div>"
+ coordinates/compose
+ coordinates/unify
+ coordinates/->nodes))))
-(deftest unify-test) \ No newline at end of file
+ (testing "Create nodes with attributes"
+ (is (= [{:attrs {:class "some test classes"
+ :data-attr "something"
+ :checked true}
+ :children [{:name :dompa/text
+ :value "hello"}]
+ :name :div}]
+ (-> "<div class=\"some test classes\" data-attr=\"something\" checked>hello</div>"
+ coordinates/compose
+ coordinates/unify
+ coordinates/->nodes))))) \ No newline at end of file