Skip to content

Commit e586a57

Browse files
authored
Merge branch 'master' into master
2 parents f2996b7 + a9d4314 commit e586a57

File tree

12 files changed

+1138
-736
lines changed

12 files changed

+1138
-736
lines changed

example1/3-FitModel.html

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12067,10 +12067,11 @@ <h2 id="Select-a-Classification-Method-(default-SVM)">Select a Classification Me
1206712067

1206812068

1206912069

12070-
<div id="6b3611d1-e0af-421e-b0f1-f0df1e9309d2"></div>
12070+
12071+
<div id="b13aa3da-a92c-4ae9-af15-c6d66b025b16"></div>
1207112072
<div class="output_subarea output_widget_view ">
1207212073
<script type="text/javascript">
12073-
var element = $('#6b3611d1-e0af-421e-b0f1-f0df1e9309d2');
12074+
var element = $('#b13aa3da-a92c-4ae9-af15-c6d66b025b16');
1207412075
</script>
1207512076
<script type="application/vnd.jupyter.widget-view+json">
1207612077
{"model_id": "ab03394bfe0b457ea53b160d07957fc3", "version_major": 2, "version_minor": 0}

example1/4-Predict.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11855,10 +11855,10 @@ <h2 id="Enter-a-Protein-Sequence-in-Text-Box">Enter a Protein Sequence in Text B
1185511855

1185611856

1185711857

11858-
<div id="e25fe3ca-d369-498b-84b3-ff010d88ef5f"></div>
11858+
<div id="09051c6c-3269-45c9-bde1-ad39c3085667"></div>
1185911859
<div class="output_subarea output_widget_view ">
1186011860
<script type="text/javascript">
11861-
var element = $('#e25fe3ca-d369-498b-84b3-ff010d88ef5f');
11861+
var element = $('#09051c6c-3269-45c9-bde1-ad39c3085667');
1186211862
</script>
1186311863
<script type="application/vnd.jupyter.widget-view+json">
1186411864
{"model_id": "9bef296ce825422592439bdcd9ad4e96", "version_major": 2, "version_minor": 0}

example2/0-Workflow.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11948,18 +11948,18 @@ <h2 id="Version-and-Hardware-Information">Version and Hardware Information<a cla
1194811948

1194911949

1195011950
<div class="output_subarea output_stream output_stdout output_text">
11951-
<pre>CPython 3.6.3
11952-
IPython 6.3.1
11951+
<pre>CPython 3.7.0
11952+
IPython 6.4.0
1195311953

1195411954
matplotlib 2.2.2
1195511955
pyvolve 0.8.8
1195611956
seaborn 0.9.0
1195711957
treesap n
1195811958
treeswift n
1195911959

11960-
compiler : GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)
11960+
compiler : Clang 9.1.0 (clang-902.0.39.2)
1196111961
system : Darwin
11962-
release : 17.5.0
11962+
release : 17.7.0
1196311963
machine : x86_64
1196411964
processor : i386
1196511965
CPU cores : 4

example2/0-Workflow.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,18 +146,18 @@
146146
"name": "stdout",
147147
"output_type": "stream",
148148
"text": [
149-
"CPython 3.6.3\n",
150-
"IPython 6.3.1\n",
149+
"CPython 3.7.0\n",
150+
"IPython 6.4.0\n",
151151
"\n",
152152
"matplotlib 2.2.2\n",
153153
"pyvolve 0.8.8\n",
154154
"seaborn 0.9.0\n",
155155
"treesap n\u0007\n",
156156
"treeswift n\u0007\n",
157157
"\n",
158-
"compiler : GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)\n",
158+
"compiler : Clang 9.1.0 (clang-902.0.39.2)\n",
159159
"system : Darwin\n",
160-
"release : 17.5.0\n",
160+
"release : 17.7.0\n",
161161
"machine : x86_64\n",
162162
"processor : i386\n",
163163
"CPU cores : 4\n",
@@ -198,7 +198,7 @@
198198
"name": "python",
199199
"nbconvert_exporter": "python",
200200
"pygments_lexer": "ipython3",
201-
"version": "3.6.3"
201+
"version": "3.7.0"
202202
}
203203
},
204204
"nbformat": 4,

example2/1-SimulateTree.html

Lines changed: 196 additions & 23 deletions
Large diffs are not rendered by default.

example2/1-SimulateTree.ipynb

Lines changed: 129 additions & 23 deletions
Large diffs are not rendered by default.

example2/2-SimulateSequences.html

Lines changed: 127 additions & 88 deletions
Large diffs are not rendered by default.

example2/2-SimulateSequences.ipynb

Lines changed: 107 additions & 87 deletions
Large diffs are not rendered by default.

example2/helper.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python3
2+
'''
3+
Helper functions for Example 2: "Simulate Phylogenetic Trees and Sequences"
4+
'''
5+
from math import log
6+
7+
def read_FASTA(filename):
8+
'''
9+
Given the name of a FASTA file, parse the contents of the FASTA file and return a dictionary in which keys are identifiers and values are sequences.
10+
'''
11+
seqs = {}
12+
name = None
13+
seq = ''
14+
f = open(filename)
15+
for line in f:
16+
l = line.strip()
17+
if len(l) == 0:
18+
continue
19+
if l[0] == '>':
20+
if name is not None:
21+
assert len(seq) != 0, "Malformed FASTA"
22+
seqs[name] = seq
23+
name = l[1:]
24+
assert name not in seqs, "Duplicate sequence ID: %s" % name
25+
seq = ''
26+
else:
27+
seq += l
28+
f.close()
29+
assert name is not None and len(seq) != 0, "Malformed FASTA"
30+
seqs[name] = seq
31+
return seqs
32+
33+
def distance_matrix_to_list(dm):
34+
'''
35+
Convert an n-by-n pairwise distance matrix (dictionary of dictionaries) into a list of n(n-1)/2 pairwise distances. Sorts keys in ascending order.
36+
'''
37+
keys = sorted(dm.keys())
38+
return [dm[keys[i]][keys[j]] for i in range(len(keys)-1) for j in range(i+1,len(keys))]
39+
40+
def hamming(u,v):
41+
'''
42+
Given a pair of sequences u and v with equal lengths, compute the Hamming distance between u and v (as a proportion, not a count).
43+
'''
44+
return sum(u[i] != v[i] for i in range(len(u)))/float(len(u))
45+
46+
def compute_hamming_distance_matrix(sequences):
47+
'''
48+
Given a dictionary of n sequences in which keys are identifiers and values are sequences, return an n-by-n pairwise distance matrix (dictionary of dictionaries) of Hamming distances.
49+
'''
50+
return {u:{v:hamming(sequences[u],sequences[v]) for v in sequences} for u in sequences}
51+
52+
def jc69_correction(h):
53+
'''
54+
Given a Hamming distance h, compute the corresponding JC69-corrected phylogenetic distance.
55+
'''
56+
return -3*log(1-(4*h/3))/4
57+
58+
def compute_jc69_corrected_distance_matrix(sequences):
59+
'''
60+
Given a dictionary of n sequences in which keys are identifiers and values are sequences, return an n-by-n pairwise distance matrix (dictionary of dictionaries) of JC69-corrected distances.
61+
'''
62+
hamming_distances = compute_hamming_distance_matrix(sequences)
63+
return {u:{v:jc69_correction(hamming_distances[u][v]) for v in sequences} for u in sequences}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
(((((((((((((((((((((((((((((((((((((((((((((99:0.0009837555885622296,98:0.0009837555885622296):0.012617995483941213,97:0.013601751072503443):0.01623194298567593,96:0.029833694058179372):0.006384384996249104,95:0.036218079054428476):0.001220073876505301,94:0.03743815293093378):0.019259239364383174,93:0.05669739229531695):0.0033714278457196434,92:0.060068820141036594):0.0020807292250444864,91:0.06214954936608108):0.00222461683497438,90:0.06437416620105546):0.002189466516701377,89:0.06656363271775684):0.03071457345210521,88:0.09727820616986205):0.0007657169468536784,87:0.09804392311671573):0.00307679784272169,86:0.10112072095943742):0.0033629292811728773,85:0.10448365024061029):0.01697244091923139,84:0.12145609115984168):0.005540723047795071,83:0.12699681420763675):0.008343383437540208,(82:0.0033637364602677744,81:0.0033637364602677744):0.1319764611849092):0.004898373954862811,(80:0.004031584790756104,79:0.004031584790756104):0.13620698680928367):0.011301155493334769,78:0.15153972709337454):0.0017657579105570809,77:0.15330548500393162):0.018298433637650435,76:0.17160391864158206):0.006318982832540704,75:0.17792290147412276):0.0018638034855667507,74:0.17978670495968951):0.0018284214586361536,73:0.18161512641832567):0.005416497788585661,72:0.18703162420691133):0.009267212160401689,71:0.19629883636731302):0.003229093489707374,70:0.1995279298570204):0.0014381859925296503,69:0.20096611584955004):0.00510062128909039,68:0.20606673713864043):0.0023196419109919597,67:0.2083863790496324):0.008186953842170913,66:0.2165733328918033):0.01966098795289062,65:0.23623432084469392):0.023093058624789636,64:0.2593273794694836):0.005416476731909689,(63:0.16872462647290276,((((((((((((62:0.013852158252180002,61:0.013852158252180002):0.015163672761747016,60:0.029015831013927018):0.011505252654723519,59:0.04052108366865054):4.1591481607983205e-05,58:0.04056267515025852):0.03282946738728776,57:0.07339214253754628):0.0017731699648132482,56:0.07516531250235953):0.01928157747403847,55:0.094446889976398):0.00678562932714355,54:0.10123251930354155):0.02449751556072144,53:0.12573003486426299):0.005815044565791905,52:0.1315450794300549):0.0030034531174717993,51:0.1345485325475267):0.01719363937150023,50:0.15174217191902692):0.016982454553875842):0.09601922972849049):0.007519046600015733,49:0.27226290280140897):0.0034530674134169853,(48:0.10640771037170249,(((((((((((((((((((((((47:0.0029250834750005894,46:0.0029250834750005894):0.0034355058569602304,45:0.00636058933196082):0.005440920788434167,44:0.011801510120394987):0.0007455197766266797,43:0.012547029897021666):0.001936381772736584,42:0.01448341166975825):0.015778063100820916,41:0.030261474770579166):0.0020704573613534727,40:0.03233193213193264):0.0012498280768054593,39:0.0335817602087381):0.004461021199604787,38:0.038042781408342885):0.00547685216693955,37:0.043519633575282435):0.0003616322744869338,36:0.04388126584976937):0.016992209382973023,35:0.06087347523274239):0.0004641055176082265,34:0.06133758075035062):0.005234768226126452,33:0.06657234897647707):0.007802722190871247,(32:0.03589049554668988,((((31:0.004374064132201427,30:0.004374064132201427):0.007809567397482786,29:0.012183631529684213):0.010285314389337052,28:0.022468945919021266):0.011331447459255684,27:0.03380039337827695):0.002090102168412933):0.038484575620658434):0.0004776542433548858,26:0.0748527254107032):0.0038950163626137457,25:0.07874774177331695):0.0016380131584897706,24:0.08038575493180672):0.00899443821535012,(23:0.0557023804511706,(((((22:0.00019410788092022457,21:0.00019410788092022457):0.006114619455001469,20:0.006308727335921693):0.016803595994933085,19:0.023112323330854778):0.009883422563005395,18:0.03299574589386017):0.0033475817365964944,17:0.03634332763045667):0.01935905282071393):0.03367781269598624):0.003647142194341868,16:0.09302733534149871):0.002991362644266138,15:0.09601869798576484):0.0019123888200362482,14:0.09793108680580109):0.003041806380459977,13:0.10097289318626107):0.005434817185441421):0.16930825984312348):0.022187307818901936,12:0.2979032780337279):0.016552930012233555,11:0.31445620804596147):0.004471811486065208,10:0.31892801953202665):0.004268322054257791,9:0.32319634158628446):0.0020627512451838123,8:0.32525909283146826):0.03432632789287199,7:0.35958542072434024):0.01858015943421981,(6:0.03417100561937009,(((5:0.01912907732847202,4:0.01912907732847202):0.0011476658873309509,3:0.020276743215802973):0.013695657037984421,2:0.033972400253787394):0.0001986053655826936):0.34399457453918997):0.0019973768060964053,1:0.38016295696465646):0.0015534782801391016,0:0.3817164352447956):0.003900630947679524;
1+
((((((((((((((((((((((((((((((((((((((((99:0.038956578646457796,98:0.038956578646457796):0.01394086745283496,97:0.052897446099292755):0.0007638849809186166,96:0.05366133108021137):0.01090409022611194,95:0.06456542130632331):0.009032412666693745,94:0.07359783397301706):0.005577754711768079,93:0.07917558868478514):0.004311714608989381,92:0.08348730329377452):0.0007652112691891033,91:0.08425251456296362):0.013528780370385907,90:0.09778129493334953):0.005968708937741929,89:0.10375000387109146):0.004507484726092914,88:0.10825748859718437):0.018192910305074028,87:0.1264503989022584):0.00020646497835352662,86:0.12665686388061193):0.0037062729616518797,85:0.1303631368422638):0.004172411386502317,84:0.13453554822876612):0.011951062538782198,(83:0.12272567554970937,(((((((((((((82:0.0011922280576215916,81:0.0011922280576215916):0.0025933850657823276,80:0.003785613123403919):0.01299620700374099,79:0.01678182012714491):0.004995895345123791,78:0.0217777154722687):0.02224617953885144,77:0.04402389501112014):0.011803713780393799,76:0.05582760879151394):0.015909596376683488,75:0.07173720516819743):0.0003048929547763146,74:0.07204209812297374):0.0037605304280601826,73:0.07580262855103392):0.006184551120769488,72:0.08198717967180341):0.01974555104953224,71:0.10173273072133565):0.0026256482705022677,70:0.10435837899183792):0.016792131315634617,69:0.12115051030747254):0.0015751652422368312):0.023760935217838952):0.00031591868340979934,68:0.14680252945095812):0.02662605575004659,67:0.1734285852010047):0.013049177836361642,(66:0.055233823611872124,(((((((((65:0.00209991267913201,64:0.00209991267913201):0.0014791410645934033,63:0.0035790537437254133):0.0038351141122738297,62:0.007414167855999243):0.004598933508489722,61:0.012013101364488965):0.018763180730012485,60:0.03077628209450145):0.0012907897400005752,59:0.032067071834502026):0.0020102339275271386,58:0.034077305762029164):0.017922529060985404,57:0.05199983482301457):0.0004556850979191007,56:0.05245551992093367):0.002778303690938455):0.13124393942549423):0.0016607131603759984,55:0.18813847619774235):0.014570699273186327,(54:0.05546211242569421,(((((((53:0.015322301729098431,52:0.015322301729098431):0.006502242112338685,51:0.021824543841437116):1.5695404477211738e-06,50:0.021826113381884837):0.00013788990074692364,49:0.02196400328263176):0.013234008468670622,48:0.03519801175130238):0.010734298128924347,47:0.04593230988022673):0.000755448799230729,46:0.04668775867945746):0.008774353746236752):0.14724706304523447):0.0008411838209703704,45:0.20355035929189905):0.006482980440282671,44:0.21003333973218172):0.016867058960988085,43:0.2269003986931698):0.01366502488802776,42:0.24056542358119756):0.00980369863390615,41:0.2503691222151037):0.0012056056273427596,40:0.2515747278424465):0.01258084951988589,39:0.26415557736233236):0.025098775141502216,38:0.2892543525038346):0.02583722596685306,37:0.31509157847068764):0.003410547889054885,36:0.3185021263597425):0.007522712105463958,35:0.32602483846520647):0.00036890688374553726,34:0.32639374534895205):0.00486021082869989,(33:0.19169351556775716,((((((((((((((((((((32:0.0047601520319067725,31:0.0047601520319067725):0.004372468841329691,30:0.009132620873236463):0.01639772437202419,29:0.025530345245260655):0.0019352521222903984,28:0.027465597367551053):0.019331973396637725,27:0.04679757076418878):0.01572945019512112,26:0.0625270209593099):0.031201756826761684,25:0.09372877778607158):0.004636938986474715,24:0.0983657167725463):0.00019580922289774705,23:0.09856152599544404):0.002491603037301726,22:0.10105312903274577):0.007251343354677919,21:0.10830447238742369):0.003855611389514735,20:0.11216008377693842):0.015012550900493749,19:0.12717263467743217):0.007999833964583314,18:0.1351724686420155):0.0025777313142931746,(17:0.04105149816790821,(((((16:0.002558400857104959,15:0.002558400857104959):0.008019479461173151,14:0.01057788031827811):0.00140278975407232,13:0.01198067007235043):0.0005730434570699328,12:0.012553713529420363):0.01987616578287943,11:0.03242987931229979):0.008621618855608415):0.09669870178840045):0.0024866679957714077,10:0.14023686795208007):0.0016596913062111307,9:0.1418965592582912):0.013149997550159442,8:0.15504655680845064):0.016913509987838166,7:0.1719600667962888):0.013965608522324008,6:0.18592567531861282):0.00576784024914434):0.13956044060989475):0.009034236991771388,5:0.34028819316942327):0.0007619471962584945,4:0.3410501403656818):0.016633260241499415,3:0.3576834006071812):0.009522223250375973,2:0.3672056238575572):0.0010102281770183984,1:0.3682158520345756):7.251470109967406e-05,0:0.3682883667356753):0.031060887508719184;

0 commit comments

Comments
 (0)