diff --git a/Model/lib/wdk/model/records/datasetRecords.xml b/Model/lib/wdk/model/records/datasetRecords.xml index 4157bfb0..65342c8b 100644 --- a/Model/lib/wdk/model/records/datasetRecords.xml +++ b/Model/lib/wdk/model/records/datasetRecords.xml @@ -179,7 +179,12 @@ --> - + + + + + + @@ -418,7 +423,22 @@ + + + + + + + + + + + + +
@@ -999,11 +1019,11 @@ Based on evolutionarily informed expectations of gene content of near-universal coalesce(dsp.display_category, dsp.category) as newcategory FROM ApidbTuning.DatasetPresenter dsp WHERE project_id = '@PROJECT_ID@' - -- UNION - -- SELECT DISTINCT - -- CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, - -- null as newcategory - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT DISTINCT + CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, + null as newcategory + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1016,7 +1036,7 @@ Based on evolutionarily informed expectations of gene content of near-universal SELECT DISTINCT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, null as newcategory - FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1042,9 +1062,9 @@ Based on evolutionarily informed expectations of gene content of near-universal UNION select DISTINCT null as dataset_sha1_digest, - CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, + CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, d.name as dataset_display_name - from @VDI_CONTROL_SCHEMA@.availableuserdatasets d + from @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1064,6 +1084,19 @@ Based on evolutionarily informed expectations of gene content of near-universal + + + + + + + + @@ -1496,9 +1529,9 @@ Based on evolutionarily informed expectations of gene content of near-universal LEFT JOIN apidbtuning.organismattributes oa ON dsp.name = oa.internal_abbrev || '_primary_genome_RSRC' WHERE dsp.project_id = '@PROJECT_ID@' AND dsp.category != 'Link outs' - -- UNION - -- SELECT DISTINCT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, d.name as display_name - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT DISTINCT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, d.name as display_name + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1527,9 +1560,9 @@ Based on evolutionarily informed expectations of gene content of near-universal FROM apidbtuning.datasetpresenter dsp LEFT JOIN apidbtuning.organismattributes oa ON dsp.name = oa.internal_abbrev || '_primary_genome_RSRC' WHERE dsp.project_id = '@PROJECT_ID@' - -- UNION - -- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, null as megabase_pairs, null as genecount - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, null as megabase_pairs, null as genecount + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1583,10 +1616,10 @@ Based on evolutionarily informed expectations of gene content of near-universal GROUP BY dataset_presenter_id ) dp ON dsp.dataset_presenter_id = dp.dataset_presenter_id WHERE dsp.project_id = '@PROJECT_ID@' - -- UNION - -- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, - -- CASE WHEN is_public = 1 THEN 'yes' ELSE 'no' END as is_public - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, + CASE WHEN accessibility != 'private' THEN 'yes' ELSE 'no' END as is_public + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1651,9 +1684,9 @@ Based on evolutionarily informed expectations of gene content of near-universal FROM apidbtuning.datasetpresenter dsp LEFT JOIN all_versions_filtered av ON dsp.dataset_presenter_id = av.dataset_presenter_id WHERE dsp.project_id = '@PROJECT_ID@' - -- UNION - -- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, null as genome_version, null as annotation_version, null as functional_annotation_version - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, null as genome_version, null as annotation_version, null as functional_annotation_version + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1730,24 +1763,23 @@ Based on evolutionarily informed expectations of gene content of near-universal FROM apidbTuning.EupathBuildDates WHERE project = '@PROJECT_ID@' ) introduced ON dsp.build_number_introduced = introduced.build_number::numeric - -- TODO: add this back - -- UNION ALL - -- SELECT CONCAT('EDAUD_', d.dataset_id) as dataset_id - -- , null as dataset_name - -- , null as dataset_name_pattern - -- , null as version - -- , d.name as short_display_name - -- , d.description - -- , d.summary::TEXT as summary - -- , null as protocol - -- , null as usage - -- , null as type - -- , null as is_species_scope - -- , null as build_number_introduced - -- , null as eupath_release - -- , null as pmids - -- , null as pmids_download - --FROM @VDI_CONTROL_SCHEMA@.dataset_meta d -- NOTE: We're reaching into VDI control and selecting from dataset_meta here. This is because availabledatasets has duplicate rows and cannot be selected "distinct"ly since we're selecting clobs. + UNION ALL + SELECT CONCAT('EDAUD_', d.dataset_id) as dataset_id + , null as dataset_name + , null as dataset_name_pattern + , null as version + , d.name as short_display_name + , d.description + , d.summary::TEXT as summary + , null as protocol + , null as usage + , null as type + , null as is_species_scope + , null as build_number_introduced + , to_char(d.release_date, 'YYYY-MON-DD') as eupath_release + , null as pmids + , null as pmids_download + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1888,7 +1920,7 @@ Based on evolutionarily informed expectations of gene content of near-universal , null as eupath_release , null as pmids , null as pmids_download - FROM @VDI_CONTROL_SCHEMA@.dataset_meta d -- NOTE: We're reaching into VDI control and selecting from dataset_meta here. This is because availabledatasets has duplicate rows and cannot be selected "distinct"ly since we're selecting clobs. + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1896,13 +1928,13 @@ Based on evolutionarily informed expectations of gene content of near-universal - + @@ -1911,13 +1943,13 @@ Based on evolutionarily informed expectations of gene content of near-universal @@ -1935,12 +1967,12 @@ Based on evolutionarily informed expectations of gene content of near-universal dsp.release_policy from apidbTuning.DatasetPresenter dsp where dsp.project_id = '@PROJECT_ID@' - -- UNION - -- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id - -- , null as caveat - -- , null as acknowledgement - -- , null as release_policy - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id + , null as caveat + , null as acknowledgement + , null as release_policy + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -1978,20 +2010,14 @@ Based on evolutionarily informed expectations of gene content of near-universal AND dsc.is_primary_contact = true AND dsp.project_id = '@PROJECT_ID@' UNION --- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id --- , dc.name as contact --- , affiliation as institution --- , email --- , short_attribution --- from @VDI_CONTROL_SCHEMA@.availableuserdatasets d, @VDI_CONTROL_SCHEMA@.dataset_contact dc --- where dc.is_primary = 1 --- and dc.dataset_id = d.user_dataset_id SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id - , null as contact - , null as institution - , null as email - , null as short_attribution - FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + , dc.name as contact + , affiliation as institution + , email + , short_attribution + from @VDI_CONTROL_SCHEMA@.availableuserdatasets d, @VDI_CONTROL_SCHEMA@.dataset_contact dc + where dc.is_primary = 1 + and dc.dataset_id = d.user_dataset_id ]]> @@ -2027,13 +2053,15 @@ Based on evolutionarily informed expectations of gene content of near-universal LEFT JOIN ApidbTuning.DatasetContact dsc ON dsp.dataset_presenter_id = dsc.dataset_presenter_id WHERE dsc.is_primary_contact = true AND dsp.project_id = '@PROJECT_ID@' - -- UNION - -- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id - -- , null as contact - -- , null as institution - -- , null as email - -- , null as short_attribution - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id + , dc.name as contact + , affiliation as institution + , email + , short_attribution + from @VDI_CONTROL_SCHEMA@.availableuserdatasets d, @VDI_CONTROL_SCHEMA@.dataset_contact dc + where dc.is_primary = 1 + and dc.dataset_id = d.user_dataset_id ]]> @@ -2056,9 +2084,9 @@ Based on evolutionarily informed expectations of gene content of near-universal FROM apidbtuning.datasetPresenter dsp LEFT JOIN apollo_dataset ad ON dsp.dataset_presenter_id = ad.dataset_presenter_id WHERE dsp.project_id = '@PROJECT_ID@' - -- UNION - -- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, 'N/A' as in_apollo - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, 'N/A' as in_apollo + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -2119,15 +2147,26 @@ Based on evolutionarily informed expectations of gene content of near-universal FROM ( SELECT DISTINCT dsnt.dataset_presenter_id as dataset_id , CASE when tn.organism_name is null then null - ELSE REGEXP_REPLACE('Plasmodium vivax P01','^(\w*)\s(\w*)\s(\.*)','\1 \2<\I>')||'' + ELSE REGEXP_REPLACE(tn.organism_name,'^(\w*)\s(\w*)\s(\.*)','\1 \2<\I>') || '' END as org FROM apidbtuning.datasetnametaxon dsnt LEFT JOIN apidbtuning.organismattributes tn ON dsnt.taxon_id = tn.component_taxon_id ) t GROUP BY dataset_id - --UNION - --SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, null as organism_prefix - --FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT dataset_id, + regexp_replace( + substr(string_agg(org,'
' order by org), 1, 4000) , '
[A-Za-z\. <>&]+$', '
...') AS organism_prefix + FROM ( + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, + substr(string_agg(oa.organism_name,'
' order by org), 1, 4000) , '
[A-Za-z\. <>&]+$', '
...') AS organism_prefix + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d, + @VDI_CONTROL_SCHEMA@.dataset_organism do, + apidbtuning.organismattributes oa + where d.user_dataset_id = o.dataset_id + and do.organism_abbrev = oa.internal_abbrev + ) t + GROUP BY dataset_id ]]> @@ -2159,9 +2198,9 @@ Based on evolutionarily informed expectations of gene content of near-universal -- lower(substr(ENCODE(DIGEST(replace(name, '_ebi_', '_'), 'SHA1'),'hex'), 0, 10)) -- as old_dataset_id -- FROM apidbtuning.datasetpresenter where name like '%_ebi_%' - -- UNION - -- SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, CONCAT('EDAUD_', d.user_dataset_id) as old_dataset_id - -- FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d + UNION + SELECT CONCAT('EDAUD_', d.user_dataset_id) as dataset_id, CONCAT('EDAUD_', d.user_dataset_id) as old_dataset_id + FROM @VDI_CONTROL_SCHEMA@.availableuserdatasets d ]]> @@ -2184,6 +2223,29 @@ Based on evolutionarily informed expectations of gene content of near-universal + + + + + + + + + >'studyDesign' AS study_design, + json->>'disease' AS disease, + json->>'country' AS country, + json->>'years' AS years, + json->>'ages' AS ages, + json->>'sample_type' AS sample_type, + FROM @VDI_CONTROL_SCHEMA@.dataset_properties dp, @VDI_CONTROL_SCHEMA@.AvailableUserDatasets aud + where dp.dataset_id = aud.user_dataset_id + ]]> + + + @@ -2354,6 +2416,12 @@ Based on evolutionarily informed expectations of gene content of near-universal FROM ApidbTuning.DatasetPresenter dsp, ApidbTuning.DatasetContact dsc WHERE dsp.dataset_presenter_id = dsc.dataset_presenter_id ORDER BY dsc.is_primary_contact desc, dsc.dataset_contact_id asc + UNION + SELECT d.name as dataset_name, CONCAT('EDAUD_', d.user_dataset_id) as dataset_id + , dc.name as contact_name + , affiliation + from @VDI_CONTROL_SCHEMA@.availableuserdatasets d, @VDI_CONTROL_SCHEMA@.dataset_contact dc + where dc.dataset_id = d.user_dataset_id ]]>