@@ -58,14 +58,69 @@ penguins
5858(def plot-width 100 )
5959(def plot-height 100 )
6060
61- ; ; Type detection: determines whether to show histograms (numeric) or bar charts (categorical)
61+ ; ; ## Type Classification and Plot Selection
62+ ; ;
63+ ; ; We use tablecloth's type system to classify columns and make intelligent
64+ ; ; visualization choices based on their semantic role in the data:
65+ ; ;
66+ ; ; - **Quantitative**: numerical data with meaningful magnitude and order
67+ ; ; - **Temporal**: datetime/time data (often paired with quantitative for time series)
68+ ; ; - **Categorical**: textual or logical data with discrete values
69+ ; ; - **Identity/Index**: all values unique or nearly unique (IDs, timestamps, etc.)
70+
71+ (defn column-general-type
72+ " Returns the general type category of a column: :quantitative, :temporal, :categorical, or :identity."
73+ [col]
74+ (cond
75+ (tcc/typeof? col :numerical ) :quantitative
76+ (tcc/typeof? col :datetime ) :temporal
77+ (tcc/typeof? col :logical ) :categorical
78+ (tcc/typeof? col :textual ) :categorical
79+ :else :identity ))
80+
81+ (defn cardinality
82+ " Count of unique non-missing values in a column."
83+ [col]
84+ (let [values (tcc/drop-missing col)]
85+ (count (set values))))
86+
87+ (defn is-identity-column?
88+ " Returns true if column appears to be an identity/index (all or nearly all unique values)."
89+ [col]
90+ (let [values (tcc/drop-missing col)
91+ n (count values)]
92+ (>= (cardinality col) (* 0.95 n)))) ; ; 95%+ unique values
6293
63- (defn is-numeric-type? [col]
64- (tcc/typeof? col :numerical ))
94+ (defn is-numeric-type?
95+ " Convenience function: returns true for quantitative columns."
96+ [col]
97+ (= :quantitative (column-general-type col)))
98+
99+ (defn get-numeric-domain
100+ " Get min and max of numeric column for scaling."
101+ [col]
102+ (let [values (remove nil? (tcc/drop-missing col))]
103+ (when (seq values)
104+ {:min (apply min values)
105+ :max (apply max values)})))
106+
107+ (defn scale-value
108+ " Scale a value from domain to plot range."
109+ [value domain plot-min plot-max]
110+ (when (and value domain)
111+ (let [{:keys [min max]} domain
112+ range (- max min)]
113+ (if (zero? range)
114+ (/ (+ plot-min plot-max) 2 )
115+ (+ plot-min (* (/ (- value min) range) (- plot-max plot-min)))))))
65116
66117(defn plot-basic [g]
67118 (let [{:keys [data mappings geometry]} (g 1 )
68- {:keys [x y]} mappings]
119+ {:keys [x y]} mappings
120+ x-col (data x)
121+ y-col (data y)
122+ x-domain (when (tcc/typeof? x-col :numerical ) (get-numeric-domain x-col))
123+ y-domain (when (tcc/typeof? y-col :numerical ) (get-numeric-domain y-col))]
69124 (for [geom geometry]
70125 (case geom
71126 :bar (let [x-vals (remove nil? (data x))
@@ -99,15 +154,39 @@ penguins
99154 :fill " lightblue"
100155 :stroke " gray"
101156 :stroke-width 0.5 }])))))
102- :point (let [xys (mapv (juxt x y) data)]
103- (for [[x y] xys]
104- [:circle {:r 2 , :cx x, :cy y, :fill " lightblue" }]))
105- :line (let [xys (mapv (juxt x y) data)]
106- [:path {:d (str " M " (str/join " ,"
107- (first xys))
108- " L " (str/join " "
109- (map #(str/join " ," %)
110- (rest xys))))}])))))
157+ :point (let [rows (tc/rows data :as-maps )
158+ xys (mapv (juxt x y) rows)]
159+ (for [[x-val y-val] xys]
160+ (when (and x-val y-val x-domain y-domain)
161+ (let [cx (scale-value (double x-val) x-domain 5 (- plot-width 5 ))
162+ cy (scale-value (double y-val) y-domain (- plot-height 5 ) 5 )]
163+ (when (and cx cy)
164+ [:circle {:r 2 , :cx cx, :cy cy, :fill " lightblue" :stroke " blue" :stroke-width 0.5 }])))))
165+ :line (let [rows (tc/rows data :as-maps )
166+ xys (mapv (juxt #(get % x) #(get % y)) rows)
167+ scaled-xys (for [[x-val y-val] xys]
168+ (when (and x-val y-val x-domain y-domain)
169+ [(scale-value (double x-val) x-domain 5 (- plot-width 5 ))
170+ (scale-value (double y-val) y-domain (- plot-height 5 ) 5 )]))
171+ valid-xys (remove nil? scaled-xys)]
172+ (when (seq valid-xys)
173+ [:path {:d (str " M " (str/join " ,"
174+ (first valid-xys))
175+ " L " (str/join " "
176+ (map #(str/join " ," %)
177+ (rest valid-xys))))
178+ :stroke " lightblue"
179+ :fill " none"
180+ :stroke-width 0.5 }]))
181+ :identity (let [values (remove nil? (data x))
182+ unique-vals (distinct values)
183+ n (count unique-vals)
184+ point-spacing (/ plot-width n)]
185+ (for [[i val] (map-indexed vector unique-vals)]
186+ [:circle {:r 1.5
187+ :cx (* (+ i 0.5 ) point-spacing)
188+ :cy (/ plot-height 2 )
189+ :fill " lightgray" }]))))))
111190
112191(defn plot-distribution [ds column geom]
113192 ^:kind/hiccup
@@ -122,25 +201,88 @@ penguins
122201
123202(plot-distribution penguins :bill-length-mm [:histogram ])
124203
204+ ; ; ## Geometry Selection
205+ ; ;
206+ ; ; Determines what type of chart works best for different data patterns
207+
208+ (defn select-geometry-single
209+ " Select visualization geometry for a single column based on its type and cardinality."
210+ [col]
211+ (let [general-type (column-general-type col)
212+ card (cardinality col)
213+ n (count (tcc/drop-missing col))]
214+ (cond
215+ ; ; All/nearly all unique values → show domain, not distribution
216+ (is-identity-column? col) :identity
217+ ; ; Quantitative → histogram shows distribution
218+ (= :quantitative general-type) :histogram
219+ ; ; Temporal → histogram of counts also works
220+ (= :temporal general-type) :histogram
221+ ; ; Categorical → bar chart shows frequencies
222+ (= :categorical general-type) :bar
223+ :else :bar )))
224+
225+ (defn select-geometry-pair
226+ " Select visualization geometry for a pair of columns based on their types."
227+ [col-a col-b]
228+ (let [type-a (column-general-type col-a)
229+ type-b (column-general-type col-b)]
230+ (cond
231+ ; ; Same column (diagonal) → single column viz
232+ (= col-a col-b) (select-geometry-single col-a)
233+ ; ; Quantitative × Quantitative → scatter plot reveals correlation
234+ (and (= :quantitative type-a) (= :quantitative type-b)) :point
235+ ; ; Temporal × Quantitative → line chart shows time series
236+ (and (= :temporal type-a) (= :quantitative type-b)) :line
237+ (and (= :quantitative type-a) (= :temporal type-b)) :line
238+ ; ; Categorical × Anything → bar chart (show distribution by category)
239+ (or (= :categorical type-a) (= :categorical type-b)) :bar
240+ ; ; Fallback
241+ :else :bar )))
242+
243+ ; ; ## Pair test
244+
245+ (defn plot-pair
246+ " Visualization for a pair of columns with automatic geometry selection."
247+ [ds column-a column-b]
248+ ^:kind/hiccup
249+ [:svg {:width 100
250+ :viewBox (str/join " " [0 0 plot-width plot-height])
251+ :xmlns " http://www.w3.org/2000/svg"
252+ :style {:border " solid 1px gray" }}
253+ [:g {:stroke " gray" , :fill " none" }
254+ (plot-basic [:graphic {:data ds
255+ :mappings {:x column-a, :y column-b}
256+ :geometry [(select-geometry-pair (ds column-a) (ds column-b))]}])]])
257+
258+ (plot-pair penguins :bill-length-mm :bill-depth-mm )
259+
260+
125261; ; ## Single Column Summaries
126262; ;
127263; ; The summarize function automatically selects the right visualization type:
128-
129- ; ; - Numeric columns → histogram (shows distribution shape )
264+ ; ; - Quantitative columns → histogram (shows distribution shape)
265+ ; ; - Temporal columns → histogram (shows frequency distribution )
130266; ; - Categorical columns → bar chart (shows frequencies)
267+ ; ; - Identity columns → sparse plot (shows all unique values)
131268
132- (defn summarize [ds column]
133- (if (is-numeric-type? (ds column))
134- (plot-distribution ds column [:histogram ])
135- (plot-distribution ds column [:bar ])))
269+ (defn summarize
270+ " Generate a single-column visualization with appropriate geometry."
271+ [ds column]
272+ (let [col (ds column)
273+ geom (select-geometry-single col)]
274+ (plot-distribution ds column [geom])))
136275
137276; ; Companion function: provides numeric summaries alongside visualizations
138- ; ; Shows count, mean, standard deviation, min/max for numeric data
139- ; ; Shows count and unique values for categorical data
277+ ; ; Shows count, mean, standard deviation, min/max for quantitative data
278+ ; ; Shows count and cardinality for categorical and identity data
140279
141- (defn get-summary-stats [ds column]
142- (let [col (ds column)]
143- (if (is-numeric-type? col)
280+ (defn get-summary-stats
281+ " Generate summary statistics appropriate to the column's type."
282+ [ds column]
283+ (let [col (ds column)
284+ general-type (column-general-type col)]
285+ (if (= :quantitative general-type)
144286 (let [stats (tcc/descriptive-statistics col)]
145287 (format " n: %d, μ: %.2f, σ: %.2f, min: %.2f, max: %.2f"
146288 (:n-elems stats)
@@ -149,8 +291,8 @@ penguins
149291 (:min stats)
150292 (:max stats)))
151293 (let [values (tcc/drop-missing col)
152- counts ( frequencies values )]
153- (str " n: " (count values) " , unique : " ( count counts) )))))
294+ card ( cardinality col )]
295+ (str " n: " (count values) " , card : " card )))))
154296
155297; ; ## Summary Table: All Columns at a Glance
156298; ;
@@ -162,20 +304,91 @@ penguins
162304 (doall (for [column-name (tc/column-names ds)]
163305 [column-name (summarize ds column-name) (get-summary-stats ds column-name)]))))
164306
307+ (defn visual-summary-grid
308+ " Grid layout with columns as vertical strips, each showing name + viz + stats."
309+ [ds]
310+ ^:kind/hiccup
311+ [:div {:style {:display " grid"
312+ :grid-template-columns " repeat(auto-fit, minmax(150px, 1fr))"
313+ :gap " 10px"
314+ :padding " 10px" }}
315+ (doall (for [column-name (tc/column-names ds)]
316+ [:div {:style {:border " 1px solid #ddd"
317+ :padding " 10px"
318+ :text-align " center" }}
319+ [:h4 {:style {:margin " 0 0 10px 0"
320+ :font-size " 14px" }} (name column-name)]
321+ (summarize ds column-name)
322+ [:div {:style {:margin-top " 10px"
323+ :font-size " 12px"
324+ :color " #666" }}
325+ (get-summary-stats ds column-name)]]))])
326+
327+ (defn visual-summary-cards
328+ " Bootstrap card layout with each column as a card in a responsive grid."
329+ [ds]
330+ ^:kind/hiccup
331+ [:div {:class " container-fluid" }
332+ [:div {:class " row" }
333+ (doall (for [column-name (tc/column-names ds)]
334+ [:div {:class " col-md-4 col-lg-3 mb-3" }
335+ [:div {:class " card h-100" }
336+ [:div {:class " card-header" }
337+ [:h5 {:class " card-title mb-0" } (name column-name)]]
338+ [:div {:class " card-body text-center d-flex flex-column justify-content-center" }
339+ (summarize ds column-name)]
340+ [:div {:class " card-footer mt-auto" }
341+ [:small {:class " text-muted" }
342+ (get-summary-stats ds column-name)]]]]))]])
343+
344+ (defn visual-summary-rows
345+ " Row-based layout with each column getting a full-width row."
346+ [ds]
347+ ^:kind/hiccup
348+ [:div {:style {:max-width " 800px"
349+ :margin " 0 auto" }}
350+ (doall (for [column-name (tc/column-names ds)]
351+ [:div {:style {:border " 1px solid #ddd"
352+ :margin-bottom " 20px"
353+ :padding " 15px"
354+ :border-radius " 5px" }}
355+ [:div {:style {:display " flex"
356+ :align-items " center"
357+ :gap " 20px" }}
358+ [:div {:style {:flex " 1 1 auto" }}
359+ [:h4 {:style {:margin " 0" }} (name column-name)]
360+ [:div {:style {:margin-top " 5px"
361+ :font-size " 12px"
362+ :color " #666" }}
363+ (get-summary-stats ds column-name)]]
364+ [:div {:style {:flex " 0 0 auto"
365+ :text-align " right" }}
366+ (summarize ds column-name)]]]))])
367+
165368(visual-summary penguins)
166369
370+ (visual-summary-grid penguins)
371+
372+ (visual-summary-cards penguins)
373+
374+ (visual-summary-rows penguins)
375+
167376; ; ## Matrix View: All Column Combinations
168377; ;
169378; ; The next step: instead of showing each column separately,
170379; ; what if we show how every column relates to every other column?
171380; ; This is the idea behind the scatterplot matrix.
172381; ;
173382; ; The matrix automatically chooses the right chart for each combination:
383+ ; ; - Quantitative × Quantitative → scatter plot (reveal correlations)
384+ ; ; - Temporal × Quantitative → line chart (show time series)
385+ ; ; - Categorical × Anything → bar chart (show distribution by category)
386+ ; ; - Single column (diagonal) → histogram/bar based on type
174387
175- ; ; - Numeric × Numeric → scatter plot (reveal relationships)
176- ; ; - Otherwise → bar chart (show distribution differences)
177-
178- ( defn matrix [ds]
388+ ( defn matrix
389+ " Create a scatterplot-matrix-style view of all column combinations.
390+ Each cell uses an appropriate visualization based on the column types. "
391+ [ds]
179392 (let [column-names (tc/column-names ds)
180393 c (count column-names)]
181394 ^:kind/hiccup
@@ -188,15 +401,12 @@ penguins
188401 [b-idx b] (map-indexed vector column-names)]
189402 (let [col-a (ds a)
190403 col-b (ds b)
191- a-numeric? (is-numeric-type? col-a)
192- b-numeric? (is-numeric-type? col-b)]
404+ geom (select-geometry-pair col-a col-b)]
193405 [:g {:transform (str " translate(" (* a-idx plot-width) " ," (* b-idx plot-height) " )" )}
194406 [:rect {:x 0 :y 0 :width plot-width :height plot-height
195407 :fill " none" :stroke " gray" :stroke-width 1 }]
196408 (plot-basic [:graphic {:data ds
197409 :mappings {:x a :y b}
198- :geometry (cond
199- (and a-numeric? b-numeric?) [:point ]
200- :else [:bar ])}])]))]]))
410+ :geometry [geom]}])]))]]))
201411
202412(matrix penguins)
0 commit comments