summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAsko Nõmm <asko@nmm.ee>2025-09-03 16:10:52 +0300
committerAsko Nõmm <asko@nmm.ee>2025-09-03 16:10:52 +0300
commit0c55f6d39bb83514f6b85f5da02214a78f2b1a13 (patch)
treed2bb4b68ee235430bf2d2529be92a625fd159af0 /src
parent8080d2b0044b348210622185b80c5f4514bb52e2 (diff)
Getting there ...
Diffstat (limited to 'src')
-rw-r--r--src/dompa/coordinates.clj20
-rw-r--r--src/dompa/core.clj30
-rw-r--r--src/dompa/nodes.clj39
3 files changed, 49 insertions, 40 deletions
diff --git a/src/dompa/coordinates.clj b/src/dompa/coordinates.clj
index e78dd08..1b18472 100644
--- a/src/dompa/coordinates.clj
+++ b/src/dompa/coordinates.clj
@@ -1,7 +1,7 @@
(ns dompa.coordinates
(:require [clojure.string :as str]))
-(defn- construct-coordinates
+(defn- construct-coordinates-reducer
[{:keys [char-type start-idx coordinates] :as state} [idx c]]
(cond
; we're undecided what to do next,
@@ -10,6 +10,7 @@
{:char-type (if (some #{c} "<>") :tag :text)
:start-idx idx
:coordinates coordinates}
+
; text ended, tag begins, which means we can
; record text node coordinates
(and (= :text char-type)
@@ -34,6 +35,12 @@
:else state))
+(defn- construct-coordinates
+ [indexed-html]
+ (->> indexed-html
+ (reduce construct-coordinates-reducer {:char-type nil :start-idx 0 :coordinates []})
+ :coordinates))
+
(defn coordinates->tag-name [html [from to]]
(-> (subs html from to)
(str/split #"[\s\>]")
@@ -58,13 +65,17 @@
[matching-start _] (nth coordinates matching-idx)]
(assoc coordinates matching-idx [matching-start end])))
-(defn- merge-coordinates-fn [html]
+(defn- merge-coordinates-reducer-fn [html]
(fn [coordinates [start end]]
(if (and (= \< (nth html start))
(= \/ (nth html (inc start) nil)))
(merge-coordinate html coordinates [start end])
(conj coordinates [start end]))))
+(defn merge-coordinates [html]
+ (-> (merge-coordinates-reducer-fn html)
+ (reduce [])))
+
(defn children
[coordinates [from to]]
(->> coordinates
@@ -83,6 +94,5 @@
(defn html->coordinates [html]
(->> (map-indexed vector html)
- (reduce construct-coordinates {:char-type nil :start-idx 0 :coordinates []})
- :coordinates
- (reduce (merge-coordinates-fn html) [])))
+ construct-coordinates
+ merge-coordinates))
diff --git a/src/dompa/core.clj b/src/dompa/core.clj
index d8fff41..79d0c08 100644
--- a/src/dompa/core.clj
+++ b/src/dompa/core.clj
@@ -4,23 +4,25 @@
[dompa.nodes :refer [coordinates->nodes]]))
(defn html->nodes [html]
- (->> html
- html->coordinates
+ (->> (html->coordinates html)
(coordinates->nodes html)))
-(defn nodes->html [nodes]
- (reduce
- (fn [html node]
- (cond
- (= (-> node :name) :dompa/text)
- (str html (-> node :value))
+(defn nodes->html
+ ([nodes]
+ (nodes->html nodes {:void-nodes #{:img}}))
+ ([nodes {:keys [void-nodes]}]
+ (reduce
+ (fn [html node]
+ (cond
+ (= (-> node :name) :dompa/text)
+ (str html (-> node :value))
- :else
- (let [node-name (-> node :name name)
- node-child-html (nodes->html (-> node :children))]
- (str html "<" node-name ">" node-child-html "</" node-name ">"))))
- ""
- nodes))
+ :else
+ (let [node-name (-> node :name name)
+ node-child-html (nodes->html (-> node :children))]
+ (str html "<" node-name ">" node-child-html "</" node-name ">"))))
+ ""
+ nodes)))
(defn traverse-nodes [nodes pred]
(reduce
diff --git a/src/dompa/nodes.clj b/src/dompa/nodes.clj
index eb96027..72f89ff 100644
--- a/src/dompa/nodes.clj
+++ b/src/dompa/nodes.clj
@@ -3,7 +3,7 @@
[clojure.string :as str]
[dompa.coordinates :as coordinates]))
-(defn- html->node-name
+(defn- html-str->node-name
"Parses a given HTML string of a node to get its name as
a keyword. A text node will return `:dompa/text`."
[html]
@@ -14,7 +14,7 @@
keyword)
:dompa/text))
-(defn- attr->k-v
+(defn- html-attr-str->k-v
"Parses a given HTML node attribute string into a
key-value pair."
[attr]
@@ -22,52 +22,49 @@
(filter #(not= (-> % first) \=))
(map #(reduce str %))))
-(defn- normalize-attr-str
+(defn- normalize-html-attr-str
"Normalizes a given HTML attribute string. If it
has surrounding quotes, removes them."
- [attr-str]
- (if (str/starts-with? attr-str "\"")
- (->> (subs attr-str 1)
+ [html-attr-str]
+ (if (str/starts-with? html-attr-str "\"")
+ (->> (subs html-attr-str 1)
(take-while #(not= % \"))
(reduce str))
- attr-str))
+ html-attr-str))
-(defn- parse-attr
+(defn- parse-html-attr-str
"Parses a given HTML attribute into a normalized
key-value map. Attributes with no value part are
treated as boolean attributes, and are always `true`."
- [attr]
- (let [[k v] (attr->k-v attr)
+ [html-attr-str]
+ (let [[k v] (html-attr-str->k-v html-attr-str)
k (keyword k)
- v (if (nil? v) true (normalize-attr-str v))]
+ v (if (nil? v) true (normalize-html-attr-str v))]
{k v}))
-(defn- html->node-attrs [html]
+(defn- html-str->node-attrs [html]
(when (str/starts-with? html "<")
(->> (subs html 1)
(take-while #(not (contains? #{\> \/} %)))
(partition-by #(= % \space))
(drop 1)
(filter #(not= (-> % first) \space))
- (map parse-attr)
+ (map parse-html-attr-str)
(into {}))))
-(comment
- (html->node-attrs "<img src=\"test.jpg\" checked />"))
-
(defn- construct-node
[node-html node-children]
- (let [node-name (html->node-name node-html)]
+ (let [node-name (html-str->node-name node-html)]
(merge
{:name node-name}
(when (= node-name :dompa/text)
{:value node-html})
- (when-let [attrs (html->node-attrs node-html)]
+ (when-let [attrs (html-str->node-attrs node-html)]
{:attrs attrs})
(when node-children
{:children node-children}))))
-(defn coordinates->nodes
+(defn html-coordinates->nodes
[html coordinates]
(when (seq coordinates)
(let [sorted-coordinates (sort-by first coordinates)
@@ -75,6 +72,6 @@
children (coordinates/children sorted-coordinates [parent-from parent-to])
remaining (coordinates/without-children sorted-coordinates [parent-from parent-to])
node-html (subs html parent-from (inc parent-to))
- node-children (coordinates->nodes html children)]
+ node-children (html-coordinates->nodes html children)]
(cons (construct-node node-html node-children)
- (coordinates->nodes html remaining)))))
+ (html-coordinates->nodes html remaining)))))