|
1 | 1 | { |
2 | 2 | "cells": [ |
3 | | - { |
4 | | - "cell_type": "code", |
5 | | - "execution_count": null, |
6 | | - "metadata": {}, |
7 | | - "outputs": [], |
8 | | - "source": [ |
9 | | - "from pronto import Ontology\n", |
10 | | - "go = Ontology(\"go.obo\")\n", |
11 | | - "go" |
12 | | - ] |
13 | | - }, |
14 | | - { |
15 | | - "cell_type": "code", |
16 | | - "execution_count": null, |
17 | | - "metadata": {}, |
18 | | - "outputs": [], |
19 | | - "source": [ |
20 | | - "with open(\"ms.json\", \"wb\") as f:\n", |
21 | | - " go.dump(f, format=\"json\")" |
22 | | - ] |
23 | | - }, |
24 | 3 | { |
25 | 4 | "cell_type": "code", |
26 | 5 | "execution_count": null, |
27 | 6 | "metadata": {}, |
28 | 7 | "outputs": [], |
29 | 8 | "source": [ |
30 | 9 | "import json\n", |
31 | | - "with open(\"ms.json\", \"r\") as f:\n", |
32 | | - " go = json.load(f)" |
33 | | - ] |
34 | | - }, |
35 | | - { |
36 | | - "cell_type": "code", |
37 | | - "execution_count": null, |
38 | | - "metadata": {}, |
39 | | - "outputs": [], |
40 | | - "source": [ |
41 | | - "go[\"graphs\"][0].keys()" |
42 | | - ] |
43 | | - }, |
44 | | - { |
45 | | - "cell_type": "code", |
46 | | - "execution_count": null, |
47 | | - "metadata": {}, |
48 | | - "outputs": [], |
49 | | - "source": [ |
50 | | - "go[\"graphs\"][0][\"nodes\"][0]" |
51 | | - ] |
52 | | - }, |
53 | | - { |
54 | | - "cell_type": "code", |
55 | | - "execution_count": null, |
56 | | - "metadata": {}, |
57 | | - "outputs": [], |
58 | | - "source": [ |
59 | | - "edge_dict: dict = {}\n", |
60 | | - "for relationship in go[\"graphs\"][0][\"edges\"]:\n", |
61 | | - " parent_list = edge_dict.get(relationship[\"sub\"].split(\"/\")[-1], [])\n", |
62 | | - " parent_list.append((relationship[\"obj\"].split(\"/\")[-1], relationship[\"pred\"]))\n", |
63 | | - " edge_dict[relationship[\"sub\"].split(\"/\")[-1]] = parent_list" |
64 | | - ] |
65 | | - }, |
66 | | - { |
67 | | - "cell_type": "code", |
68 | | - "execution_count": null, |
69 | | - "metadata": {}, |
70 | | - "outputs": [], |
71 | | - "source": [ |
72 | | - "edge_dict" |
73 | | - ] |
74 | | - }, |
75 | | - { |
76 | | - "cell_type": "code", |
77 | | - "execution_count": null, |
78 | | - "metadata": {}, |
79 | | - "outputs": [], |
80 | | - "source": [ |
81 | | - "for go_term in go[\"graphs\"][0][\"nodes\"]:\n", |
82 | | - " if go_term[\"type\"] != \"CLASS\":\n", |
83 | | - " print(go_term)" |
| 10 | + "import random\n", |
| 11 | + "from pronto import Ontology, Definition\n", |
| 12 | + "\n", |
| 13 | + "class ImpatientVocab():\n", |
| 14 | + " def __init__(self) -> None:\n", |
| 15 | + " self.used_colors: list[str] = []\n", |
| 16 | + " self.impatient_json: list[dict] = []\n", |
| 17 | + " self.impatient_onto: Ontology = None\n", |
| 18 | + " self.list_of_terms: list[str] = []\n", |
| 19 | + "\n", |
| 20 | + " def load_json(self, path: str) -> list[dict]:\n", |
| 21 | + " self.impatient_json = json.load(open(path, \"r\"))\n", |
| 22 | + " return self.impatient_json\n", |
| 23 | + " \n", |
| 24 | + " def load_ontology(self, path: str) -> Ontology:\n", |
| 25 | + " self.impatient_onto = Ontology(path)\n", |
| 26 | + " return self.impatient_onto\n", |
| 27 | + " \n", |
| 28 | + " def json_to_onto(self) -> Ontology:\n", |
| 29 | + " self.impatient_onto = Ontology()\n", |
| 30 | + " for term in self.impatient_json:\n", |
| 31 | + " added_term = self.impatient_onto.create_term(term[\"id\"].replace(\"_\", \":\"))\n", |
| 32 | + " added_term.name = term[\"text\"]\n", |
| 33 | + " for syn in term[\"data\"][\"synonymes\"].split(\",\"):\n", |
| 34 | + " if syn != \"\":\n", |
| 35 | + " added_term.add_synonym(syn, scope=\"EXACT\")\n", |
| 36 | + " if term[\"data\"][\"description\"] != \"\":\n", |
| 37 | + " added_term.definition = Definition(term[\"data\"][\"description\"])\n", |
| 38 | + " if term[\"parent\"] != \"#\":\n", |
| 39 | + " added_term.superclasses().add(self.impatient_onto[term[\"parent\"].replace(\"_\", \":\")])\n", |
| 40 | + " \n", |
| 41 | + " self.list_of_terms.append(added_term)\n", |
| 42 | + " return self.impatient_onto\n", |
| 43 | + " \n", |
| 44 | + " def onto_to_json(self) -> list[dict]:\n", |
| 45 | + " self.impatient_json = []\n", |
| 46 | + " index = 0\n", |
| 47 | + " for term in self.impatient_onto.terms():\n", |
| 48 | + " relationships = []\n", |
| 49 | + " for rel in term.superclasses():\n", |
| 50 | + " relationships.append(rel.id)\n", |
| 51 | + " relationships.pop(0)\n", |
| 52 | + " self.impatient_json.append(\n", |
| 53 | + " {\n", |
| 54 | + " \"id\": term.id.replace(\"_\", \":\"),\n", |
| 55 | + " \"text\": term.name,\n", |
| 56 | + " \"icon\": True,\n", |
| 57 | + " \"data\": {\n", |
| 58 | + " \"description\": term.definition if term.definition is not None else \"\",\n", |
| 59 | + " \"synonymes\": \",\".join([syn.description for syn in term.synonyms]),\n", |
| 60 | + " \"phenotype_datamined\": \"\",\n", |
| 61 | + " \"gene_datamined\": \"\",\n", |
| 62 | + " \"alternative_language\": term.name,\n", |
| 63 | + " \"correlates_with\": \"\",\n", |
| 64 | + " \"image_annotation\": True if index == 0 else False,\n", |
| 65 | + " \"hex_color\": self._generate_hex_color(),\n", |
| 66 | + " \"hpo_datamined\": \"\",\n", |
| 67 | + " },\n", |
| 68 | + " \"parent\": relationships[0].replace(\"_\", \":\") if relationships != [] else \"#\"\n", |
| 69 | + " }\n", |
| 70 | + " )\n", |
| 71 | + " index += 1\n", |
| 72 | + " return self.impatient_json\n", |
| 73 | + " \n", |
| 74 | + " def _generate_hex_color(self):\n", |
| 75 | + " while True:\n", |
| 76 | + " # Generate a random hex color\n", |
| 77 | + " color = \"#{:06x}\".format(random.randint(0, 0xFFFFFF))\n", |
| 78 | + " # Check if the color has already been used\n", |
| 79 | + " if color not in self.used_colors:\n", |
| 80 | + " # Add the color to the list of used colors and return it\n", |
| 81 | + " self.used_colors.append(color)\n", |
| 82 | + " return color\n", |
| 83 | + " \n", |
| 84 | + " def dump_onto(self, path: str) -> None:\n", |
| 85 | + " with open(path, \"wb\") as f:\n", |
| 86 | + " self.impatient_onto.dump(f, format=\"obo\")\n", |
| 87 | + "\n", |
| 88 | + " def dump_json(self, path: str) -> None:\n", |
| 89 | + " with open(path, \"w\") as f:\n", |
| 90 | + " json.dump(self.impatient_json, f, indent=2)" |
84 | 91 | ] |
85 | 92 | }, |
86 | 93 | { |
|
89 | 96 | "metadata": {}, |
90 | 97 | "outputs": [], |
91 | 98 | "source": [ |
92 | | - "names: list[str] = []\n", |
93 | | - "id: list[str] = []\n", |
94 | | - "desc: list[str] = []\n", |
95 | | - "synonymes: list[list[str]] = []\n", |
96 | | - "\n", |
97 | | - "for go_term in go[\"graphs\"][0][\"nodes\"]:\n", |
98 | | - " if go_term[\"type\"] == \"CLASS\":\n", |
99 | | - " id.append(go_term[\"id\"].split(\"/\")[-1])\n", |
100 | | - " names.append(go_term[\"lbl\"])\n", |
101 | | - " desc.append(go_term[\"meta\"][\"definition\"][\"val\"])\n", |
102 | | - " synonymes.append([syn[\"val\"] for syn in go_term[\"meta\"][\"synonyms\"]])" |
| 99 | + "my_onto = ImpatientVocab()\n", |
| 100 | + "my_onto.load_json(\"ontology.json.demo\")\n", |
| 101 | + "my_onto.json_to_onto()\n", |
| 102 | + "my_onto.dump_onto(\"ontology_imp.obo\")" |
103 | 103 | ] |
104 | 104 | }, |
105 | | - { |
106 | | - "cell_type": "code", |
107 | | - "execution_count": null, |
108 | | - "metadata": {}, |
109 | | - "outputs": [], |
110 | | - "source": [] |
111 | | - }, |
112 | 105 | { |
113 | 106 | "cell_type": "code", |
114 | 107 | "execution_count": null, |
115 | 108 | "metadata": {}, |
116 | 109 | "outputs": [], |
117 | 110 | "source": [ |
118 | | - "import jsonschema\n", |
119 | | - "from jsonschema import validate\n", |
120 | | - "\n", |
121 | | - "impatient_json: list[dict] = []\n", |
122 | | - "impatient_json_schema = {\n", |
123 | | - " \"type\": \"object\",\n", |
124 | | - " \"properties\": {\n", |
125 | | - " \"id\": {\"type\": \"string\"},\n", |
126 | | - " \"text\": {\"type\": \"string\"},\n", |
127 | | - " \"icon\": {\"type\": \"boolean\"},\n", |
128 | | - " \"data\": {\n", |
129 | | - " \"type\": \"object\",\n", |
130 | | - " \"properties\": {\n", |
131 | | - " \"description\": {\"type\": \"string\"},\n", |
132 | | - " \"synonymes\": {\"type\": \"string\"},\n", |
133 | | - " \"phenotype_datamined\": {\"type\": \"string\"},\n", |
134 | | - " \"gene_datamined\": {\"type\": \"string\"},\n", |
135 | | - " \"alternative_language\": {\"type\": \"string\"},\n", |
136 | | - " \"correlates_with\": {\"type\": \"string\"},\n", |
137 | | - " \"image_annotation\": {\"type\": \"boolean\"},\n", |
138 | | - " \"hex_color\": {\"type\": \"string\", \"pattern\": \"^#[0-9a-fA-F]{6}$\"},\n", |
139 | | - " \"hpo_datamined\": {\"type\": \"string\"},\n", |
140 | | - " },\n", |
141 | | - " \"required\": [\n", |
142 | | - " \"description\",\n", |
143 | | - " \"synonymes\",\n", |
144 | | - " \"phenotype_datamined\",\n", |
145 | | - " \"gene_datamined\",\n", |
146 | | - " \"alternative_language\",\n", |
147 | | - " \"correlates_with\",\n", |
148 | | - " \"image_annotation\",\n", |
149 | | - " \"hex_color\",\n", |
150 | | - " \"hpo_datamined\",\n", |
151 | | - " ],\n", |
152 | | - " },\n", |
153 | | - " \"parent\": {\"type\": \"string\"},\n", |
154 | | - " },\n", |
155 | | - " \"required\": [\"id\", \"text\", \"icon\", \"data\", \"parent\"],\n", |
156 | | - "}\n", |
157 | | - "\n", |
158 | | - "for index in range(len(id)):\n", |
159 | | - " impatient_json.append(\n", |
160 | | - " {\n", |
161 | | - " \"id\": id[index].replace(\"_\", \":\"),\n", |
162 | | - " \"text\": names[index],\n", |
163 | | - " \"icon\": True,\n", |
164 | | - " \"data\": {\n", |
165 | | - " \"description\": desc[index],\n", |
166 | | - " \"synonymes\": ','.join(synonymes[index]),\n", |
167 | | - " \"phenotype_datamined\": \"\",\n", |
168 | | - " \"gene_datamined\": \"\",\n", |
169 | | - " \"alternative_language\": names[index],\n", |
170 | | - " \"correlates_with\": \"\",\n", |
171 | | - " \"image_annotation\": True if index==0 else False,\n", |
172 | | - " \"hex_color\": \"#FFFFFF\",\n", |
173 | | - " \"hpo_datamined\": \"\",\n", |
174 | | - " },\n", |
175 | | - " \"parent\": \"#\",\n", |
176 | | - " }\n", |
177 | | - " )\n", |
178 | | - " \n", |
179 | | - "for child, parent in edge_dict.items():\n", |
180 | | - " try:\n", |
181 | | - " index_term = id.index(child)\n", |
182 | | - " except ValueError:\n", |
183 | | - " print(f\"Term {child} not found in the list of terms\")\n", |
184 | | - " continue\n", |
185 | | - " # Only one parent so yeah we are loosing information.\n", |
186 | | - " impatient_json[index_term][\"parent\"] = parent[0][0].replace(\"_\", \":\")" |
| 111 | + "my_onto = ImpatientVocab()\n", |
| 112 | + "my_onto.load_ontology(\"goslim_agr.obo\")\n", |
| 113 | + "my_onto.onto_to_json()\n", |
| 114 | + "my_onto.dump_json(\"obo_to_json_GO.json\")" |
187 | 115 | ] |
188 | 116 | }, |
189 | 117 | { |
|
192 | 120 | "metadata": {}, |
193 | 121 | "outputs": [], |
194 | 122 | "source": [ |
195 | | - "json.dump(impatient_json, open(\"impatient.json\", \"w\"))" |
| 123 | + "my_onto = ImpatientVocab()\n", |
| 124 | + "my_onto.load_ontology(\"ontology_imp.obo\")\n", |
| 125 | + "my_onto.onto_to_json()\n", |
| 126 | + "my_onto.dump_json(\"obo_to_json_IMP.json\")" |
196 | 127 | ] |
197 | 128 | }, |
198 | 129 | { |
|
201 | 132 | "metadata": {}, |
202 | 133 | "outputs": [], |
203 | 134 | "source": [ |
204 | | - "for idx, json_data in enumerate(impatient_json, start=1):\n", |
205 | | - " validate(instance=json_data, schema=impatient_json_schema)" |
| 135 | + "my_onto = ImpatientVocab()\n", |
| 136 | + "my_onto.load_ontology(\"hp.owl\")\n", |
| 137 | + "my_onto.onto_to_json()\n", |
| 138 | + "my_onto.dump_json(\"obo_to_json_HPO.json\")" |
206 | 139 | ] |
207 | 140 | } |
208 | 141 | ], |
|
0 commit comments