Skip to content

Commit c71201e

Browse files
update summary styling
1 parent 084bd4e commit c71201e

File tree

1 file changed

+245
-35
lines changed

1 file changed

+245
-35
lines changed

src/data_visualization/aog/column_combinations.clj

Lines changed: 245 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,69 @@ penguins
5858
(def plot-width 100)
5959
(def plot-height 100)
6060

61-
;; Type detection: determines whether to show histograms (numeric) or bar charts (categorical)
61+
;; ## Type Classification and Plot Selection
62+
;;
63+
;; We use tablecloth's type system to classify columns and make intelligent
64+
;; visualization choices based on their semantic role in the data:
65+
;;
66+
;; - **Quantitative**: numerical data with meaningful magnitude and order
67+
;; - **Temporal**: datetime/time data (often paired with quantitative for time series)
68+
;; - **Categorical**: textual or logical data with discrete values
69+
;; - **Identity/Index**: all values unique or nearly unique (IDs, timestamps, etc.)
70+
71+
(defn column-general-type
72+
"Returns the general type category of a column: :quantitative, :temporal, :categorical, or :identity."
73+
[col]
74+
(cond
75+
(tcc/typeof? col :numerical) :quantitative
76+
(tcc/typeof? col :datetime) :temporal
77+
(tcc/typeof? col :logical) :categorical
78+
(tcc/typeof? col :textual) :categorical
79+
:else :identity))
80+
81+
(defn cardinality
82+
"Count of unique non-missing values in a column."
83+
[col]
84+
(let [values (tcc/drop-missing col)]
85+
(count (set values))))
86+
87+
(defn is-identity-column?
88+
"Returns true if column appears to be an identity/index (all or nearly all unique values)."
89+
[col]
90+
(let [values (tcc/drop-missing col)
91+
n (count values)]
92+
(>= (cardinality col) (* 0.95 n)))) ;; 95%+ unique values
6293

63-
(defn is-numeric-type? [col]
64-
(tcc/typeof? col :numerical))
94+
(defn is-numeric-type?
95+
"Convenience function: returns true for quantitative columns."
96+
[col]
97+
(= :quantitative (column-general-type col)))
98+
99+
(defn get-numeric-domain
100+
"Get min and max of numeric column for scaling."
101+
[col]
102+
(let [values (remove nil? (tcc/drop-missing col))]
103+
(when (seq values)
104+
{:min (apply min values)
105+
:max (apply max values)})))
106+
107+
(defn scale-value
108+
"Scale a value from domain to plot range."
109+
[value domain plot-min plot-max]
110+
(when (and value domain)
111+
(let [{:keys [min max]} domain
112+
range (- max min)]
113+
(if (zero? range)
114+
(/ (+ plot-min plot-max) 2)
115+
(+ plot-min (* (/ (- value min) range) (- plot-max plot-min)))))))
65116

66117
(defn plot-basic [g]
67118
(let [{:keys [data mappings geometry]} (g 1)
68-
{:keys [x y]} mappings]
119+
{:keys [x y]} mappings
120+
x-col (data x)
121+
y-col (data y)
122+
x-domain (when (tcc/typeof? x-col :numerical) (get-numeric-domain x-col))
123+
y-domain (when (tcc/typeof? y-col :numerical) (get-numeric-domain y-col))]
69124
(for [geom geometry]
70125
(case geom
71126
:bar (let [x-vals (remove nil? (data x))
@@ -99,15 +154,39 @@ penguins
99154
:fill "lightblue"
100155
:stroke "gray"
101156
:stroke-width 0.5}])))))
102-
:point (let [xys (mapv (juxt x y) data)]
103-
(for [[x y] xys]
104-
[:circle {:r 2, :cx x, :cy y, :fill "lightblue"}]))
105-
:line (let [xys (mapv (juxt x y) data)]
106-
[:path {:d (str "M " (str/join ","
107-
(first xys))
108-
" L " (str/join " "
109-
(map #(str/join "," %)
110-
(rest xys))))}])))))
157+
:point (let [rows (tc/rows data :as-maps)
158+
xys (mapv (juxt x y) rows)]
159+
(for [[x-val y-val] xys]
160+
(when (and x-val y-val x-domain y-domain)
161+
(let [cx (scale-value (double x-val) x-domain 5 (- plot-width 5))
162+
cy (scale-value (double y-val) y-domain (- plot-height 5) 5)]
163+
(when (and cx cy)
164+
[:circle {:r 2, :cx cx, :cy cy, :fill "lightblue" :stroke "blue" :stroke-width 0.5}])))))
165+
:line (let [rows (tc/rows data :as-maps)
166+
xys (mapv (juxt #(get % x) #(get % y)) rows)
167+
scaled-xys (for [[x-val y-val] xys]
168+
(when (and x-val y-val x-domain y-domain)
169+
[(scale-value (double x-val) x-domain 5 (- plot-width 5))
170+
(scale-value (double y-val) y-domain (- plot-height 5) 5)]))
171+
valid-xys (remove nil? scaled-xys)]
172+
(when (seq valid-xys)
173+
[:path {:d (str "M " (str/join ","
174+
(first valid-xys))
175+
" L " (str/join " "
176+
(map #(str/join "," %)
177+
(rest valid-xys))))
178+
:stroke "lightblue"
179+
:fill "none"
180+
:stroke-width 0.5}]))
181+
:identity (let [values (remove nil? (data x))
182+
unique-vals (distinct values)
183+
n (count unique-vals)
184+
point-spacing (/ plot-width n)]
185+
(for [[i val] (map-indexed vector unique-vals)]
186+
[:circle {:r 1.5
187+
:cx (* (+ i 0.5) point-spacing)
188+
:cy (/ plot-height 2)
189+
:fill "lightgray"}]))))))
111190

112191
(defn plot-distribution [ds column geom]
113192
^:kind/hiccup
@@ -122,25 +201,88 @@ penguins
122201

123202
(plot-distribution penguins :bill-length-mm [:histogram])
124203

204+
;; ## Geometry Selection
205+
;;
206+
;; Determines what type of chart works best for different data patterns
207+
208+
(defn select-geometry-single
209+
"Select visualization geometry for a single column based on its type and cardinality."
210+
[col]
211+
(let [general-type (column-general-type col)
212+
card (cardinality col)
213+
n (count (tcc/drop-missing col))]
214+
(cond
215+
;; All/nearly all unique values → show domain, not distribution
216+
(is-identity-column? col) :identity
217+
;; Quantitative → histogram shows distribution
218+
(= :quantitative general-type) :histogram
219+
;; Temporal → histogram of counts also works
220+
(= :temporal general-type) :histogram
221+
;; Categorical → bar chart shows frequencies
222+
(= :categorical general-type) :bar
223+
:else :bar)))
224+
225+
(defn select-geometry-pair
226+
"Select visualization geometry for a pair of columns based on their types."
227+
[col-a col-b]
228+
(let [type-a (column-general-type col-a)
229+
type-b (column-general-type col-b)]
230+
(cond
231+
;; Same column (diagonal) → single column viz
232+
(= col-a col-b) (select-geometry-single col-a)
233+
;; Quantitative × Quantitative → scatter plot reveals correlation
234+
(and (= :quantitative type-a) (= :quantitative type-b)) :point
235+
;; Temporal × Quantitative → line chart shows time series
236+
(and (= :temporal type-a) (= :quantitative type-b)) :line
237+
(and (= :quantitative type-a) (= :temporal type-b)) :line
238+
;; Categorical × Anything → bar chart (show distribution by category)
239+
(or (= :categorical type-a) (= :categorical type-b)) :bar
240+
;; Fallback
241+
:else :bar)))
242+
243+
;; ## Pair test
244+
245+
(defn plot-pair
246+
"Visualization for a pair of columns with automatic geometry selection."
247+
[ds column-a column-b]
248+
^:kind/hiccup
249+
[:svg {:width 100
250+
:viewBox (str/join " " [0 0 plot-width plot-height])
251+
:xmlns "http://www.w3.org/2000/svg"
252+
:style {:border "solid 1px gray"}}
253+
[:g {:stroke "gray", :fill "none"}
254+
(plot-basic [:graphic {:data ds
255+
:mappings {:x column-a, :y column-b}
256+
:geometry [(select-geometry-pair (ds column-a) (ds column-b))]}])]])
257+
258+
(plot-pair penguins :bill-length-mm :bill-depth-mm)
259+
260+
125261
;; ## Single Column Summaries
126262
;;
127263
;; The summarize function automatically selects the right visualization type:
128-
129-
;; - Numeric columns → histogram (shows distribution shape)
264+
;; - Quantitative columns → histogram (shows distribution shape)
265+
;; - Temporal columns → histogram (shows frequency distribution)
130266
;; - Categorical columns → bar chart (shows frequencies)
267+
;; - Identity columns → sparse plot (shows all unique values)
131268

132-
(defn summarize [ds column]
133-
(if (is-numeric-type? (ds column))
134-
(plot-distribution ds column [:histogram])
135-
(plot-distribution ds column [:bar])))
269+
(defn summarize
270+
"Generate a single-column visualization with appropriate geometry."
271+
[ds column]
272+
(let [col (ds column)
273+
geom (select-geometry-single col)]
274+
(plot-distribution ds column [geom])))
136275

137276
;; Companion function: provides numeric summaries alongside visualizations
138-
;; Shows count, mean, standard deviation, min/max for numeric data
139-
;; Shows count and unique values for categorical data
277+
;; Shows count, mean, standard deviation, min/max for quantitative data
278+
;; Shows count and cardinality for categorical and identity data
140279

141-
(defn get-summary-stats [ds column]
142-
(let [col (ds column)]
143-
(if (is-numeric-type? col)
280+
(defn get-summary-stats
281+
"Generate summary statistics appropriate to the column's type."
282+
[ds column]
283+
(let [col (ds column)
284+
general-type (column-general-type col)]
285+
(if (= :quantitative general-type)
144286
(let [stats (tcc/descriptive-statistics col)]
145287
(format "n: %d, μ: %.2f, σ: %.2f, min: %.2f, max: %.2f"
146288
(:n-elems stats)
@@ -149,8 +291,8 @@ penguins
149291
(:min stats)
150292
(:max stats)))
151293
(let [values (tcc/drop-missing col)
152-
counts (frequencies values)]
153-
(str "n: " (count values) ", unique: " (count counts))))))
294+
card (cardinality col)]
295+
(str "n: " (count values) ", card: " card)))))
154296

155297
;; ## Summary Table: All Columns at a Glance
156298
;;
@@ -162,20 +304,91 @@ penguins
162304
(doall (for [column-name (tc/column-names ds)]
163305
[column-name (summarize ds column-name) (get-summary-stats ds column-name)]))))
164306

307+
(defn visual-summary-grid
308+
"Grid layout with columns as vertical strips, each showing name + viz + stats."
309+
[ds]
310+
^:kind/hiccup
311+
[:div {:style {:display "grid"
312+
:grid-template-columns "repeat(auto-fit, minmax(150px, 1fr))"
313+
:gap "10px"
314+
:padding "10px"}}
315+
(doall (for [column-name (tc/column-names ds)]
316+
[:div {:style {:border "1px solid #ddd"
317+
:padding "10px"
318+
:text-align "center"}}
319+
[:h4 {:style {:margin "0 0 10px 0"
320+
:font-size "14px"}} (name column-name)]
321+
(summarize ds column-name)
322+
[:div {:style {:margin-top "10px"
323+
:font-size "12px"
324+
:color "#666"}}
325+
(get-summary-stats ds column-name)]]))])
326+
327+
(defn visual-summary-cards
328+
"Bootstrap card layout with each column as a card in a responsive grid."
329+
[ds]
330+
^:kind/hiccup
331+
[:div {:class "container-fluid"}
332+
[:div {:class "row"}
333+
(doall (for [column-name (tc/column-names ds)]
334+
[:div {:class "col-md-4 col-lg-3 mb-3"}
335+
[:div {:class "card h-100"}
336+
[:div {:class "card-header"}
337+
[:h5 {:class "card-title mb-0"} (name column-name)]]
338+
[:div {:class "card-body text-center d-flex flex-column justify-content-center"}
339+
(summarize ds column-name)]
340+
[:div {:class "card-footer mt-auto"}
341+
[:small {:class "text-muted"}
342+
(get-summary-stats ds column-name)]]]]))]])
343+
344+
(defn visual-summary-rows
345+
"Row-based layout with each column getting a full-width row."
346+
[ds]
347+
^:kind/hiccup
348+
[:div {:style {:max-width "800px"
349+
:margin "0 auto"}}
350+
(doall (for [column-name (tc/column-names ds)]
351+
[:div {:style {:border "1px solid #ddd"
352+
:margin-bottom "20px"
353+
:padding "15px"
354+
:border-radius "5px"}}
355+
[:div {:style {:display "flex"
356+
:align-items "center"
357+
:gap "20px"}}
358+
[:div {:style {:flex "1 1 auto"}}
359+
[:h4 {:style {:margin "0"}} (name column-name)]
360+
[:div {:style {:margin-top "5px"
361+
:font-size "12px"
362+
:color "#666"}}
363+
(get-summary-stats ds column-name)]]
364+
[:div {:style {:flex "0 0 auto"
365+
:text-align "right"}}
366+
(summarize ds column-name)]]]))])
367+
165368
(visual-summary penguins)
166369

370+
(visual-summary-grid penguins)
371+
372+
(visual-summary-cards penguins)
373+
374+
(visual-summary-rows penguins)
375+
167376
;; ## Matrix View: All Column Combinations
168377
;;
169378
;; The next step: instead of showing each column separately,
170379
;; what if we show how every column relates to every other column?
171380
;; This is the idea behind the scatterplot matrix.
172381
;;
173382
;; The matrix automatically chooses the right chart for each combination:
383+
;; - Quantitative × Quantitative → scatter plot (reveal correlations)
384+
;; - Temporal × Quantitative → line chart (show time series)
385+
;; - Categorical × Anything → bar chart (show distribution by category)
386+
;; - Single column (diagonal) → histogram/bar based on type
174387

175-
;; - Numeric × Numeric → scatter plot (reveal relationships)
176-
;; - Otherwise → bar chart (show distribution differences)
177-
178-
(defn matrix [ds]
388+
(defn matrix
389+
"Create a scatterplot-matrix-style view of all column combinations.
390+
Each cell uses an appropriate visualization based on the column types."
391+
[ds]
179392
(let [column-names (tc/column-names ds)
180393
c (count column-names)]
181394
^:kind/hiccup
@@ -188,15 +401,12 @@ penguins
188401
[b-idx b] (map-indexed vector column-names)]
189402
(let [col-a (ds a)
190403
col-b (ds b)
191-
a-numeric? (is-numeric-type? col-a)
192-
b-numeric? (is-numeric-type? col-b)]
404+
geom (select-geometry-pair col-a col-b)]
193405
[:g {:transform (str "translate(" (* a-idx plot-width) "," (* b-idx plot-height) ")")}
194406
[:rect {:x 0 :y 0 :width plot-width :height plot-height
195407
:fill "none" :stroke "gray" :stroke-width 1}]
196408
(plot-basic [:graphic {:data ds
197409
:mappings {:x a :y b}
198-
:geometry (cond
199-
(and a-numeric? b-numeric?) [:point]
200-
:else [:bar])}])]))]]))
410+
:geometry [geom]}])]))]]))
201411

202412
(matrix penguins)

0 commit comments

Comments
 (0)