From c9fb6f1c80a83f9e8ea88f7db3caedae57985fd7 Mon Sep 17 00:00:00 2001 From: Asko Nõmm Date: Thu, 11 Sep 2025 23:05:47 +0300 Subject: Getting there --- src/dompa/coordinates.clj | 14 ++++++++------ src/dompa/core.clj | 22 ++++++++++++++-------- src/dompa/nodes.clj | 6 +++--- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/dompa/coordinates.clj b/src/dompa/coordinates.clj index 1b18472..2cd0b0a 100644 --- a/src/dompa/coordinates.clj +++ b/src/dompa/coordinates.clj @@ -52,8 +52,10 @@ [idx (coordinates->tag-name html coordinate)])) (defn- last-coordinate-by-tag-name-idx [html coordinates name start] - (let [filtered-coordinates (filter (fn [[_ end]] (< end start)) coordinates) - named-coordinates (map-indexed (name-coordinates-fn html) filtered-coordinates)] + (let [filter-fn (fn [[_ end]] (< end start)) + filtered-coordinates (filter filter-fn coordinates) + index-fn (name-coordinates-fn html) + named-coordinates (map-indexed index-fn filtered-coordinates)] (->> named-coordinates (filter #(= name (-> % last))) last @@ -62,7 +64,7 @@ (defn- merge-coordinate [html coordinates [start end]] (let [name (coordinates->tag-name html [start end]) matching-idx (last-coordinate-by-tag-name-idx html coordinates name start) - [matching-start _] (nth coordinates matching-idx)] + [matching-start] (nth coordinates matching-idx)] (assoc coordinates matching-idx [matching-start end]))) (defn- merge-coordinates-reducer-fn [html] @@ -72,9 +74,9 @@ (merge-coordinate html coordinates [start end]) (conj coordinates [start end])))) -(defn merge-coordinates [html] +(defn merge-coordinates [html coordinates] (-> (merge-coordinates-reducer-fn html) - (reduce []))) + (reduce [] coordinates))) (defn children [coordinates [from to]] @@ -95,4 +97,4 @@ (defn html->coordinates [html] (->> (map-indexed vector html) construct-coordinates - merge-coordinates)) + (merge-coordinates html))) diff --git a/src/dompa/core.clj b/src/dompa/core.clj index 79d0c08..4891c46 100644 --- a/src/dompa/core.clj +++ b/src/dompa/core.clj @@ -7,20 +7,26 @@ (->> (html->coordinates html) (coordinates->nodes html))) +(def default-void-nodes + #{:img}) + +(defn- node->html + [{:keys [name content void-node?]}] + (if void-node? + (str "<" name ">") + (str "<" name ">" content ""))) + (defn nodes->html ([nodes] - (nodes->html nodes {:void-nodes #{:img}})) + (nodes->html nodes {:void-nodes default-void-nodes})) ([nodes {:keys [void-nodes]}] (reduce (fn [html node] - (cond - (= (-> node :name) :dompa/text) + (if (= (-> node :name) :dompa/text) (str html (-> node :value)) - - :else - (let [node-name (-> node :name name) - node-child-html (nodes->html (-> node :children))] - (str html "<" node-name ">" node-child-html "")))) + (node->html {:name (-> node :name name) + :content (nodes->html (-> node :children)) + :void-node? (contains? void-nodes (-> node :name))}))) "" nodes))) diff --git a/src/dompa/nodes.clj b/src/dompa/nodes.clj index 72f89ff..82734a3 100644 --- a/src/dompa/nodes.clj +++ b/src/dompa/nodes.clj @@ -64,7 +64,7 @@ (when node-children {:children node-children})))) -(defn html-coordinates->nodes +(defn coordinates->nodes [html coordinates] (when (seq coordinates) (let [sorted-coordinates (sort-by first coordinates) @@ -72,6 +72,6 @@ children (coordinates/children sorted-coordinates [parent-from parent-to]) remaining (coordinates/without-children sorted-coordinates [parent-from parent-to]) node-html (subs html parent-from (inc parent-to)) - node-children (html-coordinates->nodes html children)] + node-children (coordinates->nodes html children)] (cons (construct-node node-html node-children) - (html-coordinates->nodes html remaining))))) + (coordinates->nodes html remaining))))) -- cgit v1.2.3