|
| 1 | +^{:kindly/hide-code true |
| 2 | + :clay {:title "Plotting Datoms: Queries as Visual Mappings" |
| 3 | + :quarto {:type :post |
| 4 | + :author [:timothypratley] |
| 5 | + :draft true |
| 6 | + :date "2026-01-08" |
| 7 | + :description "A query-first approach makes relational data available for visualization." |
| 8 | + :image "datomframes.svg" |
| 9 | + :category :data-visualization |
| 10 | + :tags [:datavis :datascript] |
| 11 | + :keywords [:datavis :algebra :datascript :queries :datoms]}}} |
| 12 | +(ns data-visualization.aog.datomframes) |
| 13 | + |
| 14 | +;; Most plotting libraries operate on dataframes analogous to a table with rows and columns. |
| 15 | +;; But what if our plots operated on a Datomic style database of facts instead? |
| 16 | +;; |
| 17 | +;; DataScript is an in-memory database of datoms (entity-attribute-value triples). |
| 18 | +;; A plot query can select exactly the facts we want to visualize, binding them to visual channels. |
| 19 | +;; The query itself becomes the mapping: which attributes become x, y, color, or relationships. |
| 20 | + |
| 21 | +(require '[datascript.core :as d]) |
| 22 | + |
| 23 | +;; Why query-based plotting? |
| 24 | +;; Databases are relational. |
| 25 | +;; Entities can have arbitrary attributes and relationships to other entities. |
| 26 | +;; Queries let us express things that don't fit in a single table: joins across entities, |
| 27 | +;; aggregations, derived relationships. |
| 28 | +;; A query-first approach makes relational data available for visualization. |
| 29 | + |
| 30 | +;; First: some tiny datoms to play with. |
| 31 | + |
| 32 | +(def penguins |
| 33 | + [{:species "Adelie" :bill_length 29.1 :bill_depth 18.7 :sex "MALE"} |
| 34 | + {:species "Adelie" :bill_length 33.5 :bill_depth 15.4 :sex "FEMALE"} |
| 35 | + {:species "Chinstrap" :bill_length 43.5 :bill_depth 17.9 :sex "FEMALE"} |
| 36 | + {:species "Gentoo" :bill_length 47.3 :bill_depth 13.8 :sex "MALE"}]) |
| 37 | + |
| 38 | +(def penguin-db |
| 39 | + (let [conn (d/create-conn)] |
| 40 | + (d/transact! conn penguins) |
| 41 | + @conn)) |
| 42 | + |
| 43 | +;; Think of the database as many tiny facts. Each fact says: "entity has attribute value". |
| 44 | + |
| 45 | +penguin-db |
| 46 | + |
| 47 | +;; How to plot a database: |
| 48 | + |
| 49 | +(def default-palette |
| 50 | + ["#2563eb" "#f97316" "#10b981" "#a855f7" "#ef4444" "#14b8a6" "#f59e0b" "#6b7280"]) |
| 51 | + |
| 52 | +(defn color-scale |
| 53 | + "Given a sequence of category values, |
| 54 | + assign consistent colors from the default palette." |
| 55 | + [categories] |
| 56 | + (let [domain (distinct categories) |
| 57 | + colors (cycle default-palette)] |
| 58 | + (into {} (map vector domain colors)))) |
| 59 | + |
| 60 | +(defn plot-basic [g] |
| 61 | + (let [{:keys [db query geometry]} (g 1) |
| 62 | + results (vec (d/q query db))] |
| 63 | + (for [geom geometry] |
| 64 | + (case geom |
| 65 | + :point (let [color-map (color-scale (map last results))] |
| 66 | + (for [[x y color] results] |
| 67 | + [:circle {:r 2, :cx x, :cy y |
| 68 | + :fill (get color-map color "gray")}])) |
| 69 | + :line (for [[x1 y1 x2 y2] results] |
| 70 | + [:line {:x1 x1, :y1 y1, :x2 x2, :y2 y2}]))))) |
| 71 | + |
| 72 | +;; To specify a plot, we provide a query |
| 73 | + |
| 74 | +(def bill-scatter |
| 75 | + [:graphic {:db penguin-db |
| 76 | + :query '{:find [?x ?y ?color] |
| 77 | + :where [[?e :species ?color] |
| 78 | + [?e :bill_length ?x] |
| 79 | + [?e :bill_depth ?y]]} |
| 80 | + :geometry [:point]}]) |
| 81 | + |
| 82 | +;; The coordinates fall out of the query bindings; geometry only chooses how to render them: |
| 83 | + |
| 84 | +;; Wrap it in a tiny SVG viewport to see the result: |
| 85 | + |
| 86 | +^:kind/hiccup |
| 87 | +[:svg {:width "100%" :height "300" |
| 88 | + :viewBox "0 0 50 50" |
| 89 | + :xmlns "http://www.w3.org/2000/svg"} |
| 90 | + [:g {:stroke "gray", :fill "none"} |
| 91 | + (plot-basic bill-scatter)]] |
| 92 | + |
| 93 | +;; We can also query for relationships between entities. |
| 94 | +;; For example, pairs of penguins from the same species: |
| 95 | + |
| 96 | +(def same-species-relationships |
| 97 | + [:graphic {:db penguin-db |
| 98 | + :query '{:find [?x1 ?y1 ?x2 ?y2] |
| 99 | + :where [[?e1 :species ?s] |
| 100 | + [?e2 :species ?s] |
| 101 | + [?e1 :bill_length ?x1] |
| 102 | + [?e1 :bill_depth ?y1] |
| 103 | + [?e2 :bill_length ?x2] |
| 104 | + [?e2 :bill_depth ?y2] |
| 105 | + [(not= ?e1 ?e2)]]} |
| 106 | + :geometry [:line]}]) |
| 107 | + |
| 108 | +;; This small example shows that the mapping lives in the query. |
| 109 | +;; Queries can bind points and relationships between entities. |
| 110 | +;; Each geometry expects a specific binding shape (points: [x y color], lines: [x1 y1 x2 y2]), |
| 111 | +;; so the query and geometry must agree on that contract. |
| 112 | +;; The novelty here is expressiveness: a relational query can yield edges, |
| 113 | +;; something dataframe workflows don't model directly. |
| 114 | + |
| 115 | +^:kind/hiccup |
| 116 | +[:svg {:width "100%" :height "300" |
| 117 | + :viewBox "0 0 50 50" |
| 118 | + :xmlns "http://www.w3.org/2000/svg"} |
| 119 | + [:g {:stroke "gray", :fill "none"} |
| 120 | + (plot-basic bill-scatter) |
| 121 | + (plot-basic same-species-relationships)]] |
| 122 | + |
| 123 | +;; Plots as queries let us say more. |
| 124 | +;; Points and edges can be defined by a query. |
| 125 | +;; The novelty in this example is that relationships can be a first‑class things to draw. |
0 commit comments