summaryrefslogtreecommitdiff
path: root/src/dompa/nodes.clj
diff options
context:
space:
mode:
authorAsko Nõmm <asko@nmm.ee>2025-08-30 16:18:44 +0300
committerAsko Nõmm <asko@nmm.ee>2025-08-30 16:18:44 +0300
commit8080d2b0044b348210622185b80c5f4514bb52e2 (patch)
tree655c9bd9884eb735c54cf9bb6f0404ecf32a4ee1 /src/dompa/nodes.clj
parent41c1d9eadd3a5cb0a804390edca592c197f49d33 (diff)
Getting there ...
Diffstat (limited to 'src/dompa/nodes.clj')
-rw-r--r--src/dompa/nodes.clj80
1 files changed, 65 insertions, 15 deletions
diff --git a/src/dompa/nodes.clj b/src/dompa/nodes.clj
index 9b30be3..eb96027 100644
--- a/src/dompa/nodes.clj
+++ b/src/dompa/nodes.clj
@@ -1,19 +1,71 @@
(ns dompa.nodes
- (:require [clojure.string :as str]
- [dompa.coordinates :as coordinates]))
+ (:require
+ [clojure.string :as str]
+ [dompa.coordinates :as coordinates]))
-(defn- html->node-name [html]
+(defn- html->node-name
+ "Parses a given HTML string of a node to get its name as
+ a keyword. A text node will return `:dompa/text`."
+ [html]
(if (str/starts-with? html "<")
- (-> html
- (str/split #"[\s\>]")
- first
- (str/replace #"[\<\>\/]" "")
- keyword)
- :text-node))
+ (->> (subs html 1)
+ (take-while #(not (contains? #{\space \>} %)))
+ (reduce str)
+ keyword)
+ :dompa/text))
-(defn- html->node-attrs [html])
+(defn- attr->k-v
+ "Parses a given HTML node attribute string into a
+ key-value pair."
+ [attr]
+ (->> (partition-by #(= % \=) attr)
+ (filter #(not= (-> % first) \=))
+ (map #(reduce str %))))
+(defn- normalize-attr-str
+ "Normalizes a given HTML attribute string. If it
+ has surrounding quotes, removes them."
+ [attr-str]
+ (if (str/starts-with? attr-str "\"")
+ (->> (subs attr-str 1)
+ (take-while #(not= % \"))
+ (reduce str))
+ attr-str))
+(defn- parse-attr
+ "Parses a given HTML attribute into a normalized
+ key-value map. Attributes with no value part are
+ treated as boolean attributes, and are always `true`."
+ [attr]
+ (let [[k v] (attr->k-v attr)
+ k (keyword k)
+ v (if (nil? v) true (normalize-attr-str v))]
+ {k v}))
+
+(defn- html->node-attrs [html]
+ (when (str/starts-with? html "<")
+ (->> (subs html 1)
+ (take-while #(not (contains? #{\> \/} %)))
+ (partition-by #(= % \space))
+ (drop 1)
+ (filter #(not= (-> % first) \space))
+ (map parse-attr)
+ (into {}))))
+
+(comment
+ (html->node-attrs "<img src=\"test.jpg\" checked />"))
+
+(defn- construct-node
+ [node-html node-children]
+ (let [node-name (html->node-name node-html)]
+ (merge
+ {:name node-name}
+ (when (= node-name :dompa/text)
+ {:value node-html})
+ (when-let [attrs (html->node-attrs node-html)]
+ {:attrs attrs})
+ (when node-children
+ {:children node-children}))))
(defn coordinates->nodes
[html coordinates]
@@ -22,9 +74,7 @@
[parent-from parent-to] (first sorted-coordinates)
children (coordinates/children sorted-coordinates [parent-from parent-to])
remaining (coordinates/without-children sorted-coordinates [parent-from parent-to])
- node-html (subs html parent-from (inc parent-to))]
- (cons {:value (subs html parent-from (inc parent-to))
- :name (html->node-name node-html)
- :attrs (html->node-attrs node-html)
- :children (coordinates->nodes html children)}
+ node-html (subs html parent-from (inc parent-to))
+ node-children (coordinates->nodes html children)]
+ (cons (construct-node node-html node-children)
(coordinates->nodes html remaining)))))