diff options
Diffstat (limited to 'src/dompa/nodes.clj')
| -rw-r--r-- | src/dompa/nodes.clj | 39 |
1 files changed, 18 insertions, 21 deletions
diff --git a/src/dompa/nodes.clj b/src/dompa/nodes.clj index eb96027..72f89ff 100644 --- a/src/dompa/nodes.clj +++ b/src/dompa/nodes.clj @@ -3,7 +3,7 @@ [clojure.string :as str] [dompa.coordinates :as coordinates])) -(defn- html->node-name +(defn- html-str->node-name "Parses a given HTML string of a node to get its name as a keyword. A text node will return `:dompa/text`." [html] @@ -14,7 +14,7 @@ keyword) :dompa/text)) -(defn- attr->k-v +(defn- html-attr-str->k-v "Parses a given HTML node attribute string into a key-value pair." [attr] @@ -22,52 +22,49 @@ (filter #(not= (-> % first) \=)) (map #(reduce str %)))) -(defn- normalize-attr-str +(defn- normalize-html-attr-str "Normalizes a given HTML attribute string. If it has surrounding quotes, removes them." - [attr-str] - (if (str/starts-with? attr-str "\"") - (->> (subs attr-str 1) + [html-attr-str] + (if (str/starts-with? html-attr-str "\"") + (->> (subs html-attr-str 1) (take-while #(not= % \")) (reduce str)) - attr-str)) + html-attr-str)) -(defn- parse-attr +(defn- parse-html-attr-str "Parses a given HTML attribute into a normalized key-value map. Attributes with no value part are treated as boolean attributes, and are always `true`." - [attr] - (let [[k v] (attr->k-v attr) + [html-attr-str] + (let [[k v] (html-attr-str->k-v html-attr-str) k (keyword k) - v (if (nil? v) true (normalize-attr-str v))] + v (if (nil? v) true (normalize-html-attr-str v))] {k v})) -(defn- html->node-attrs [html] +(defn- html-str->node-attrs [html] (when (str/starts-with? html "<") (->> (subs html 1) (take-while #(not (contains? #{\> \/} %))) (partition-by #(= % \space)) (drop 1) (filter #(not= (-> % first) \space)) - (map parse-attr) + (map parse-html-attr-str) (into {})))) -(comment - (html->node-attrs "<img src=\"test.jpg\" checked />")) - (defn- construct-node [node-html node-children] - (let [node-name (html->node-name node-html)] + (let [node-name (html-str->node-name node-html)] (merge {:name node-name} (when (= node-name :dompa/text) {:value node-html}) - (when-let [attrs (html->node-attrs node-html)] + (when-let [attrs (html-str->node-attrs node-html)] {:attrs attrs}) (when node-children {:children node-children})))) -(defn coordinates->nodes +(defn html-coordinates->nodes [html coordinates] (when (seq coordinates) (let [sorted-coordinates (sort-by first coordinates) @@ -75,6 +72,6 @@ children (coordinates/children sorted-coordinates [parent-from parent-to]) remaining (coordinates/without-children sorted-coordinates [parent-from parent-to]) node-html (subs html parent-from (inc parent-to)) - node-children (coordinates->nodes html children)] + node-children (html-coordinates->nodes html children)] (cons (construct-node node-html node-children) - (coordinates->nodes html remaining))))) + (html-coordinates->nodes html remaining))))) |
