Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 196 additions & 0 deletions mimic-iii/concepts_postgres/pivot/pivoted_bg.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
-- =====================================================================
-- PostgreSQL version of BigQuery pivoted-bg.sql (MIMIC-III)
-- Purpose: Pivot blood gas / chemistry values from labevents, and assign
-- them to an icustay_id using fuzzy ICU boundaries.
--
-- Expected schemas (recommended for PR):
-- mimiciii_clinical: raw MIMIC-III tables (icustays, labevents, admissions)
-- mimiciii_derived : derived concepts output schema (where this table lives)
--
-- Dependencies:
-- - postgres-functions.sql should be loaded (for DATETIME_ADD/SUB/DIFF)
-- NOTE: This script only uses DATETIME_ADD/SUB/DIFF style patterns,
-- but is also valid if you replace them with native +/- intervals.
--
-- Output table name (suggested):
-- mimiciii_derived.pivoted_bg
-- =====================================================================

DROP TABLE IF EXISTS mimiciii_derived.pivoted_bg;

CREATE TABLE mimiciii_derived.pivoted_bg AS
WITH i AS
(
SELECT
subject_id
, icustay_id
, intime
, outtime
, LAG(outtime) OVER (PARTITION BY subject_id ORDER BY intime) AS outtime_lag
, LEAD(intime) OVER (PARTITION BY subject_id ORDER BY intime) AS intime_lead
FROM mimiciii_clinical.icustays
)
, iid_assign AS
(
SELECT
i.subject_id
, i.icustay_id
, CASE
WHEN i.outtime_lag IS NOT NULL
AND i.outtime_lag > (i.intime - INTERVAL '24' HOUR)
THEN i.intime
- (INTERVAL '1' HOUR
* CAST(ROUND( (EXTRACT(EPOCH FROM (i.intime - i.outtime_lag)) / 3600.0) / 2.0 ) AS BIGINT))
ELSE i.intime - INTERVAL '12' HOUR
END AS data_start
, CASE
WHEN i.intime_lead IS NOT NULL
AND i.intime_lead < (i.outtime + INTERVAL '24' HOUR)
THEN i.outtime
+ (INTERVAL '1' MINUTE
* CAST(ROUND( (EXTRACT(EPOCH FROM (i.intime_lead - i.outtime)) / 60.0) / 2.0 ) AS BIGINT))
ELSE i.outtime + INTERVAL '12' HOUR
END AS data_end
FROM i
)
, pvt AS
(
SELECT
le.hadm_id
, CASE
WHEN le.itemid = 50800 THEN 'SPECIMEN'
WHEN le.itemid = 50801 THEN 'AADO2'
WHEN le.itemid = 50802 THEN 'BASEEXCESS'
WHEN le.itemid = 50803 THEN 'BICARBONATE'
WHEN le.itemid = 50804 THEN 'TOTALCO2'
WHEN le.itemid = 50805 THEN 'CARBOXYHEMOGLOBIN'
WHEN le.itemid = 50806 THEN 'CHLORIDE'
WHEN le.itemid = 50808 THEN 'CALCIUM'
WHEN le.itemid = 50809 THEN 'GLUCOSE'
WHEN le.itemid = 50810 THEN 'HEMATOCRIT'
WHEN le.itemid = 50811 THEN 'HEMOGLOBIN'
WHEN le.itemid = 50812 THEN 'INTUBATED'
WHEN le.itemid = 50813 THEN 'LACTATE'
WHEN le.itemid = 50814 THEN 'METHEMOGLOBIN'
WHEN le.itemid = 50815 THEN 'O2FLOW'
WHEN le.itemid = 50816 THEN 'FIO2'
WHEN le.itemid = 50817 THEN 'SO2'
WHEN le.itemid = 50818 THEN 'PCO2'
WHEN le.itemid = 50819 THEN 'PEEP'
WHEN le.itemid = 50820 THEN 'PH'
WHEN le.itemid = 50821 THEN 'PO2'
WHEN le.itemid = 50822 THEN 'POTASSIUM'
WHEN le.itemid = 50823 THEN 'REQUIREDO2'
WHEN le.itemid = 50824 THEN 'SODIUM'
WHEN le.itemid = 50825 THEN 'TEMPERATURE'
WHEN le.itemid = 50826 THEN 'TIDALVOLUME'
WHEN le.itemid = 50827 THEN 'VENTILATIONRATE'
WHEN le.itemid = 50828 THEN 'VENTILATOR'
ELSE NULL
END AS label
, le.charttime
, le.value
, CASE
WHEN le.valuenum IS NULL THEN NULL
WHEN le.valuenum <= 0 THEN NULL
WHEN le.itemid = 50810 AND le.valuenum > 100 THEN NULL -- hematocrit
WHEN le.itemid = 50816 AND le.valuenum < 20 THEN NULL -- fio2 lower bound
WHEN le.itemid = 50816 AND le.valuenum > 100 THEN NULL -- fio2 upper bound
WHEN le.itemid = 50817 AND le.valuenum > 100 THEN NULL -- o2 sat
WHEN le.itemid = 50815 AND le.valuenum > 70 THEN NULL -- o2 flow
WHEN le.itemid = 50821 AND le.valuenum > 800 THEN NULL -- po2
ELSE le.valuenum
END AS valuenum
FROM mimiciii_clinical.labevents le
WHERE le.itemid IN
(
50800, 50801, 50802, 50803, 50804, 50805, 50806, 50807, 50808, 50809
, 50810, 50811, 50812, 50813, 50814, 50815, 50816, 50817, 50818, 50819
, 50820, 50821, 50822, 50823, 50824, 50825, 50826, 50827, 50828
, 51545
)
)
, grp AS
(
SELECT
pvt.hadm_id
, pvt.charttime
, MAX(CASE WHEN label = 'SPECIMEN' THEN value ELSE NULL END) AS specimen
, AVG(CASE WHEN label = 'AADO2' THEN valuenum ELSE NULL END) AS aado2
, AVG(CASE WHEN label = 'BASEEXCESS' THEN valuenum ELSE NULL END) AS baseexcess
, AVG(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE NULL END) AS bicarbonate
, AVG(CASE WHEN label = 'TOTALCO2' THEN valuenum ELSE NULL END) AS totalco2
, AVG(CASE WHEN label = 'CARBOXYHEMOGLOBIN' THEN valuenum ELSE NULL END) AS carboxyhemoglobin
, AVG(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE NULL END) AS chloride
, AVG(CASE WHEN label = 'CALCIUM' THEN valuenum ELSE NULL END) AS calcium
, AVG(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE NULL END) AS glucose
, AVG(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE NULL END) AS hematocrit
, AVG(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE NULL END) AS hemoglobin
, AVG(CASE WHEN label = 'INTUBATED' THEN valuenum ELSE NULL END) AS intubated
, AVG(CASE WHEN label = 'LACTATE' THEN valuenum ELSE NULL END) AS lactate
, AVG(CASE WHEN label = 'METHEMOGLOBIN' THEN valuenum ELSE NULL END) AS methemoglobin
, AVG(CASE WHEN label = 'O2FLOW' THEN valuenum ELSE NULL END) AS o2flow
, AVG(CASE WHEN label = 'FIO2' THEN valuenum ELSE NULL END) AS fio2
, AVG(CASE WHEN label = 'SO2' THEN valuenum ELSE NULL END) AS so2
, AVG(CASE WHEN label = 'PCO2' THEN valuenum ELSE NULL END) AS pco2
, AVG(CASE WHEN label = 'PEEP' THEN valuenum ELSE NULL END) AS peep
, AVG(CASE WHEN label = 'PH' THEN valuenum ELSE NULL END) AS ph
, AVG(CASE WHEN label = 'PO2' THEN valuenum ELSE NULL END) AS po2
, AVG(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE NULL END) AS potassium
, AVG(CASE WHEN label = 'REQUIREDO2' THEN valuenum ELSE NULL END) AS requiredo2
, AVG(CASE WHEN label = 'SODIUM' THEN valuenum ELSE NULL END) AS sodium
, AVG(CASE WHEN label = 'TEMPERATURE' THEN valuenum ELSE NULL END) AS temperature
, AVG(CASE WHEN label = 'TIDALVOLUME' THEN valuenum ELSE NULL END) AS tidalvolume
, MAX(CASE WHEN label = 'VENTILATIONRATE' THEN valuenum ELSE NULL END) AS ventilationrate
, MAX(CASE WHEN label = 'VENTILATOR' THEN valuenum ELSE NULL END) AS ventilator
, SUM(CASE WHEN label = 'SPECIMEN' THEN 1 ELSE 0 END) AS specimen_ct
FROM pvt
GROUP BY pvt.hadm_id, pvt.charttime
HAVING SUM(CASE WHEN label = 'SPECIMEN' THEN 1 ELSE 0 END) < 2
)
SELECT
iid.icustay_id
, grp.hadm_id
, grp.charttime
, grp.specimen
, grp.aado2
, grp.baseexcess
, grp.bicarbonate
, grp.totalco2
, grp.carboxyhemoglobin
, grp.chloride
, grp.calcium
, grp.glucose
, grp.hematocrit
, grp.hemoglobin
, grp.intubated
, grp.lactate
, grp.methemoglobin
, grp.o2flow
, grp.fio2
, grp.so2
, grp.pco2
, grp.peep
, grp.ph
, grp.po2
, grp.potassium
, grp.requiredo2
, grp.sodium
, grp.temperature
, grp.tidalvolume
, grp.ventilationrate
, grp.ventilator
FROM grp
INNER JOIN mimiciii_clinical.admissions adm
ON grp.hadm_id = adm.hadm_id
LEFT JOIN iid_assign iid
ON adm.subject_id = iid.subject_id
AND grp.charttime >= iid.data_start
AND grp.charttime < iid.data_end
ORDER BY grp.hadm_id, grp.charttime;

-- Suggested indexes (optional, but helps downstream joins a lot):
-- CREATE INDEX IF NOT EXISTS idx_pivoted_bg_icustay_charttime
-- ON mimiciii_derived.pivoted_bg (icustay_id, charttime);
-- CREATE INDEX IF NOT EXISTS idx_pivoted_bg_hadm_charttime
-- ON mimiciii_derived.pivoted_bg (hadm_id, charttime);
174 changes: 174 additions & 0 deletions mimic-iii/concepts_postgres/pivot/pivoted_bg_art.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
-- =====================================================================
-- PostgreSQL version of BigQuery pivoted-bg-art.sql (MIMIC-III)
-- Requires: mimiciii_derived.pivoted_bg
--
-- Output: mimiciii_derived.pivoted_bg_art
-- =====================================================================

DROP TABLE IF EXISTS mimiciii_derived.pivoted_bg_art;

CREATE TABLE mimiciii_derived.pivoted_bg_art AS
WITH stg_spo2 AS
(
SELECT
hadm_id
, charttime
, AVG(valuenum) AS spo2
FROM mimiciii_clinical.chartevents
WHERE itemid IN (646, 220277) -- SpO2
AND valuenum > 0 AND valuenum <= 100
AND charttime IS NOT NULL
GROUP BY hadm_id, charttime
)
, stg_fio2 AS
(
SELECT
hadm_id
, charttime
, MAX(
CASE
WHEN itemid = 223835 THEN
CASE
WHEN valuenum > 0 AND valuenum <= 1 THEN valuenum * 100
WHEN valuenum > 1 AND valuenum < 21 THEN NULL
WHEN valuenum >= 21 AND valuenum <= 100 THEN valuenum
ELSE NULL
END
WHEN itemid IN (3420, 3422) THEN
valuenum
WHEN itemid = 190 AND valuenum > 0.20 AND valuenum < 1 THEN
valuenum * 100
ELSE NULL
END
) AS fio2_chartevents
FROM mimiciii_clinical.chartevents
WHERE itemid IN (3420, 190, 223835, 3422)
AND valuenum > 0 AND valuenum < 100
AND charttime IS NOT NULL
-- exclude rows marked as error (if column exists in your import)
AND (error IS NULL OR error <> 1)
GROUP BY hadm_id, charttime
)
, stg2 AS
(
SELECT
bg.*
, ROW_NUMBER() OVER
(PARTITION BY bg.hadm_id, bg.charttime
ORDER BY s1.charttime DESC NULLS LAST) AS lastrowspo2
, s1.spo2
FROM mimiciii_derived.pivoted_bg bg
LEFT JOIN stg_spo2 s1
ON bg.hadm_id = s1.hadm_id
AND s1.charttime BETWEEN (bg.charttime - INTERVAL '2' HOUR) AND bg.charttime
WHERE bg.po2 IS NOT NULL
)
, stg3 AS
(
SELECT
bg.*
, ROW_NUMBER() OVER
(PARTITION BY bg.hadm_id, bg.charttime
ORDER BY s2.charttime DESC NULLS LAST) AS lastrowfio2
, s2.fio2_chartevents

-- Logistic regression probability (same coefficients as BigQuery)
, 1.0 / (1.0 + EXP(-(
-0.02544
+ 0.04598 * po2
+ COALESCE(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429)
+ COALESCE( 0.00621 * fio2_chartevents, 0.00621 * 51.49550 - 0.24958)
+ COALESCE( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954)
+ COALESCE( 0.13251 * so2 , 0.13251 * 93.66539 - 0.23172)
+ COALESCE(-0.01511 * pco2 , -0.01511 * 42.08866 - 0.01630)
+ COALESCE( 0.01480 * fio2 , 0.01480 * 63.97836 - 0.31142)
+ COALESCE(-0.00200 * aado2 , -0.00200 * 442.21186 - 0.01328)
+ COALESCE(-0.03220 * bicarbonate , -0.03220 * 22.96894 - 0.06535)
+ COALESCE( 0.05384 * totalco2 , 0.05384 * 24.72632 - 0.01405)
+ COALESCE( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038)
+ COALESCE( 0.10956 * ph , 0.10956 * 7.36233 - 0.00617)
+ COALESCE( 0.00848 * o2flow , 0.00848 * 7.59362 - 0.35803)
))) AS specimen_prob
FROM stg2 bg
LEFT JOIN stg_fio2 s2
ON bg.hadm_id = s2.hadm_id
AND s2.charttime BETWEEN (bg.charttime - INTERVAL '4' HOUR) AND bg.charttime
AND s2.fio2_chartevents > 0
WHERE bg.lastrowspo2 = 1
)
SELECT
stg3.hadm_id
, stg3.icustay_id
, stg3.charttime
, stg3.specimen

, CASE
WHEN stg3.specimen IS NOT NULL THEN stg3.specimen
WHEN stg3.specimen_prob > 0.75 THEN 'ART'
ELSE NULL
END AS specimen_pred
, stg3.specimen_prob

-- oxygen related parameters
, stg3.so2
, stg3.spo2
, stg3.po2
, stg3.pco2
, stg3.fio2_chartevents
, stg3.fio2
, stg3.aado2

, CASE
WHEN stg3.po2 IS NOT NULL
AND stg3.pco2 IS NOT NULL
AND COALESCE(stg3.fio2, stg3.fio2_chartevents) IS NOT NULL
THEN (COALESCE(stg3.fio2, stg3.fio2_chartevents) / 100.0) * (760 - 47) - (stg3.pco2 / 0.8) - stg3.po2
ELSE NULL
END AS aado2_calc

, CASE
WHEN stg3.po2 IS NOT NULL
AND COALESCE(stg3.fio2, stg3.fio2_chartevents) IS NOT NULL
THEN 100.0 * stg3.po2 / COALESCE(stg3.fio2, stg3.fio2_chartevents)
ELSE NULL
END AS pao2fio2ratio

-- acid-base parameters
, stg3.ph
, stg3.baseexcess
, stg3.bicarbonate
, stg3.totalco2

-- blood count parameters
, stg3.hematocrit
, stg3.hemoglobin
, stg3.carboxyhemoglobin
, stg3.methemoglobin

-- chemistry
, stg3.chloride
, stg3.calcium
, stg3.temperature
, stg3.potassium
, stg3.sodium
, stg3.lactate
, stg3.glucose

-- ventilation / misc
, stg3.intubated
, stg3.tidalvolume
, stg3.ventilationrate
, stg3.ventilator
, stg3.peep
, stg3.o2flow
, stg3.requiredo2
FROM stg3
WHERE stg3.lastrowfio2 = 1
AND (stg3.specimen = 'ART' OR stg3.specimen_prob > 0.75)
ORDER BY stg3.hadm_id, stg3.charttime;

-- Suggested indexes (optional but strongly recommended)
-- CREATE INDEX IF NOT EXISTS idx_pivoted_bg_art_icustay_charttime
-- ON mimiciii_derived.pivoted_bg_art (icustay_id, charttime);
-- CREATE INDEX IF NOT EXISTS idx_pivoted_bg_art_hadm_charttime
-- ON mimiciii_derived.pivoted_bg_art (hadm_id, charttime);
Loading