Skip to content

Commit d42c7db

Browse files
authored
Merge pull request #39 from jupyter-guide/phylogenetics-fix
Significantly updated notebooks for example 2
2 parents 14c7f5c + 7edc9e5 commit d42c7db

File tree

10 files changed

+1106
-730
lines changed

10 files changed

+1106
-730
lines changed

example1/3-FitModel.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12067,10 +12067,10 @@ <h2 id="Select-a-Classification-Method-(default-SVM)">Select a Classification Me
1206712067

1206812068

1206912069

12070-
<div id="603ff06b-a4df-4091-b524-3acb21a775a2"></div>
12070+
<div id="f05043d6-ccd2-48f2-9e69-0c9264813300"></div>
1207112071
<div class="output_subarea output_widget_view ">
1207212072
<script type="text/javascript">
12073-
var element = $('#603ff06b-a4df-4091-b524-3acb21a775a2');
12073+
var element = $('#f05043d6-ccd2-48f2-9e69-0c9264813300');
1207412074
</script>
1207512075
<script type="application/vnd.jupyter.widget-view+json">
1207612076
{"model_id": "3c60c6fc2644436ebf8b9b092e6f8612", "version_major": 2, "version_minor": 0}

example1/4-Predict.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11855,10 +11855,10 @@ <h2 id="Enter-a-Protein-Sequence-in-Text-Box">Enter a Protein Sequence in Text B
1185511855

1185611856

1185711857

11858-
<div id="5ae3b825-8c7b-49d5-8799-28d0a7342f60"></div>
11858+
<div id="7c042306-37c1-49ad-b482-1bae4dc7d691"></div>
1185911859
<div class="output_subarea output_widget_view ">
1186011860
<script type="text/javascript">
11861-
var element = $('#5ae3b825-8c7b-49d5-8799-28d0a7342f60');
11861+
var element = $('#7c042306-37c1-49ad-b482-1bae4dc7d691');
1186211862
</script>
1186311863
<script type="application/vnd.jupyter.widget-view+json">
1186411864
{"model_id": "e070e5e45045460e91c7c2b703f2e994", "version_major": 2, "version_minor": 0}

example2/0-Workflow.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@
198198
"name": "python",
199199
"nbconvert_exporter": "python",
200200
"pygments_lexer": "ipython3",
201-
"version": "3.6.3"
201+
"version": "3.7.0"
202202
}
203203
},
204204
"nbformat": 4,

example2/1-SimulateTree.html

Lines changed: 196 additions & 24 deletions
Large diffs are not rendered by default.

example2/1-SimulateTree.ipynb

Lines changed: 129 additions & 23 deletions
Large diffs are not rendered by default.

example2/2-SimulateSequences.html

Lines changed: 115 additions & 90 deletions
Large diffs are not rendered by default.

example2/2-SimulateSequences.ipynb

Lines changed: 97 additions & 87 deletions
Large diffs are not rendered by default.

example2/helper.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python3
2+
'''
3+
Helper functions for Example 2: "Simulate Phylogenetic Trees and Sequences"
4+
'''
5+
from math import log
6+
7+
def read_FASTA(filename):
8+
'''
9+
Given the name of a FASTA file, parse the contents of the FASTA file and return a dictionary in which keys are identifiers and values are sequences.
10+
'''
11+
seqs = {}
12+
name = None
13+
seq = ''
14+
f = open(filename)
15+
for line in f:
16+
l = line.strip()
17+
if len(l) == 0:
18+
continue
19+
if l[0] == '>':
20+
if name is not None:
21+
assert len(seq) != 0, "Malformed FASTA"
22+
seqs[name] = seq
23+
name = l[1:]
24+
assert name not in seqs, "Duplicate sequence ID: %s" % name
25+
seq = ''
26+
else:
27+
seq += l
28+
f.close()
29+
assert name is not None and len(seq) != 0, "Malformed FASTA"
30+
seqs[name] = seq
31+
return seqs
32+
33+
def distance_matrix_to_list(dm):
34+
'''
35+
Convert an n-by-n pairwise distance matrix (dictionary of dictionaries) into a list of n(n-1)/2 pairwise distances. Sorts keys in ascending order.
36+
'''
37+
keys = sorted(dm.keys())
38+
return [dm[keys[i]][keys[j]] for i in range(len(keys)-1) for j in range(i+1,len(keys))]
39+
40+
def hamming(u,v):
41+
'''
42+
Given a pair of sequences u and v with equal lengths, compute the Hamming distance between u and v (as a proportion, not a count).
43+
'''
44+
return sum(u[i] != v[i] for i in range(len(u)))/float(len(u))
45+
46+
def compute_hamming_distance_matrix(sequences):
47+
'''
48+
Given a dictionary of n sequences in which keys are identifiers and values are sequences, return an n-by-n pairwise distance matrix (dictionary of dictionaries) of Hamming distances.
49+
'''
50+
return {u:{v:hamming(sequences[u],sequences[v]) for v in sequences} for u in sequences}
51+
52+
def jc69_correction(h):
53+
'''
54+
Given a Hamming distance h, compute the corresponding JC69-corrected phylogenetic distance.
55+
'''
56+
return -3*log(1-(4*h/3))/4
57+
58+
def compute_jc69_corrected_distance_matrix(sequences):
59+
'''
60+
Given a dictionary of n sequences in which keys are identifiers and values are sequences, return an n-by-n pairwise distance matrix (dictionary of dictionaries) of JC69-corrected distances.
61+
'''
62+
hamming_distances = compute_hamming_distance_matrix(sequences)
63+
return {u:{v:jc69_correction(hamming_distances[u][v]) for v in sequences} for u in sequences}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
(((((((((((((((((((((((((((((((((((((((((((((99:0.0009837555885622296,98:0.0009837555885622296):0.012617995483941213,97:0.013601751072503443):0.01623194298567593,96:0.029833694058179372):0.006384384996249104,95:0.036218079054428476):0.001220073876505301,94:0.03743815293093378):0.019259239364383174,93:0.05669739229531695):0.0033714278457196434,92:0.060068820141036594):0.0020807292250444864,91:0.06214954936608108):0.00222461683497438,90:0.06437416620105546):0.002189466516701377,89:0.06656363271775684):0.03071457345210521,88:0.09727820616986205):0.0007657169468536784,87:0.09804392311671573):0.00307679784272169,86:0.10112072095943742):0.0033629292811728773,85:0.10448365024061029):0.01697244091923139,84:0.12145609115984168):0.005540723047795071,83:0.12699681420763675):0.008343383437540208,(82:0.0033637364602677744,81:0.0033637364602677744):0.1319764611849092):0.004898373954862811,(80:0.004031584790756104,79:0.004031584790756104):0.13620698680928367):0.011301155493334769,78:0.15153972709337454):0.0017657579105570809,77:0.15330548500393162):0.018298433637650435,76:0.17160391864158206):0.006318982832540704,75:0.17792290147412276):0.0018638034855667507,74:0.17978670495968951):0.0018284214586361536,73:0.18161512641832567):0.005416497788585661,72:0.18703162420691133):0.009267212160401689,71:0.19629883636731302):0.003229093489707374,70:0.1995279298570204):0.0014381859925296503,69:0.20096611584955004):0.00510062128909039,68:0.20606673713864043):0.0023196419109919597,67:0.2083863790496324):0.008186953842170913,66:0.2165733328918033):0.01966098795289062,65:0.23623432084469392):0.023093058624789636,64:0.2593273794694836):0.005416476731909689,(63:0.16872462647290276,((((((((((((62:0.013852158252180002,61:0.013852158252180002):0.015163672761747016,60:0.029015831013927018):0.011505252654723519,59:0.04052108366865054):4.1591481607983205e-05,58:0.04056267515025852):0.03282946738728776,57:0.07339214253754628):0.0017731699648132482,56:0.07516531250235953):0.01928157747403847,55:0.094446889976398):0.00678562932714355,54:0.10123251930354155):0.02449751556072144,53:0.12573003486426299):0.005815044565791905,52:0.1315450794300549):0.0030034531174717993,51:0.1345485325475267):0.01719363937150023,50:0.15174217191902692):0.016982454553875842):0.09601922972849049):0.007519046600015733,49:0.27226290280140897):0.0034530674134169853,(48:0.10640771037170249,(((((((((((((((((((((((47:0.0029250834750005894,46:0.0029250834750005894):0.0034355058569602304,45:0.00636058933196082):0.005440920788434167,44:0.011801510120394987):0.0007455197766266797,43:0.012547029897021666):0.001936381772736584,42:0.01448341166975825):0.015778063100820916,41:0.030261474770579166):0.0020704573613534727,40:0.03233193213193264):0.0012498280768054593,39:0.0335817602087381):0.004461021199604787,38:0.038042781408342885):0.00547685216693955,37:0.043519633575282435):0.0003616322744869338,36:0.04388126584976937):0.016992209382973023,35:0.06087347523274239):0.0004641055176082265,34:0.06133758075035062):0.005234768226126452,33:0.06657234897647707):0.007802722190871247,(32:0.03589049554668988,((((31:0.004374064132201427,30:0.004374064132201427):0.007809567397482786,29:0.012183631529684213):0.010285314389337052,28:0.022468945919021266):0.011331447459255684,27:0.03380039337827695):0.002090102168412933):0.038484575620658434):0.0004776542433548858,26:0.0748527254107032):0.0038950163626137457,25:0.07874774177331695):0.0016380131584897706,24:0.08038575493180672):0.00899443821535012,(23:0.0557023804511706,(((((22:0.00019410788092022457,21:0.00019410788092022457):0.006114619455001469,20:0.006308727335921693):0.016803595994933085,19:0.023112323330854778):0.009883422563005395,18:0.03299574589386017):0.0033475817365964944,17:0.03634332763045667):0.01935905282071393):0.03367781269598624):0.003647142194341868,16:0.09302733534149871):0.002991362644266138,15:0.09601869798576484):0.0019123888200362482,14:0.09793108680580109):0.003041806380459977,13:0.10097289318626107):0.005434817185441421):0.16930825984312348):0.022187307818901936,12:0.2979032780337279):0.016552930012233555,11:0.31445620804596147):0.004471811486065208,10:0.31892801953202665):0.004268322054257791,9:0.32319634158628446):0.0020627512451838123,8:0.32525909283146826):0.03432632789287199,7:0.35958542072434024):0.01858015943421981,(6:0.03417100561937009,(((5:0.01912907732847202,4:0.01912907732847202):0.0011476658873309509,3:0.020276743215802973):0.013695657037984421,2:0.033972400253787394):0.0001986053655826936):0.34399457453918997):0.0019973768060964053,1:0.38016295696465646):0.0015534782801391016,0:0.3817164352447956):0.003900630947679524;
1+
((((((((((((((((((((((((((((((99:0.0011961269293227494,98:0.0011961269293227494):0.006053257124301492,97:0.0072493840536242415):0.007072300810170129,96:0.01432168486379437):0.004865027215367856,95:0.019186712079162227):0.00870118851843349,94:0.027887900597595716):0.015202074427745155,93:0.04308997502534087):0.011375048996689452,92:0.05446502402203032):0.019588811422732305,91:0.07405383544476263):0.0001559712090700438,90:0.07420980665383267):0.009200403236523241,89:0.08341020989035591):0.008186584125174201,88:0.09159679401553011):0.009564062092791203,87:0.10116085610832132):0.0022904263723414198,86:0.10345128248066274):0.010115432468064012,85:0.11356671494872675):0.012000778446708438,84:0.1255674933954352):0.0021137770980621806,83:0.12768127049349737):0.00974725738452667,82:0.13742852787802404):0.007360578396286038,81:0.14478910627431008):0.00984897276864155,80:0.15463807904295163):0.0013227301243575185,79:0.15596080916730914):0.014070026754646386,78:0.17003083592195553):0.0034731873857477003,77:0.17350402330770323):0.0008750281017308181,76:0.17437905140943405):0.011413431912480301,75:0.18579248332191434):0.016352171050014422,(74:0.054704452810094534,((((((((73:0.004378900020032939,72:0.004378900020032939):0.002912794033210264,71:0.007291694053243203):8.294728063829027e-05,70:0.007374641333881493):0.006696436736095601,69:0.014071078069977094):0.0010269184953954624,68:0.015097996565372557):0.01129146646266141,67:0.026389463028033966):0.013119567482487549,66:0.039509030510521514):0.012717889240015434,65:0.05222691975053695):0.002477533059557585):0.14744020156183424):0.01926528106989045,64:0.22140993544181922):0.0035992310694699867,(63:0.20826086376126884,(((((((((((((((((((62:0.010225528430748448,61:0.010225528430748448):0.012124830901899225,60:0.022350359332647673):0.010634655592253905,(59:0.01836265795328429,(((58:0.012798315791717668,57:0.012798315791717668):0.001143263864058719,56:0.013941579655776387):0.0006513075658990397,55:0.014592887221675427):0.0037697707316088624):0.01462235697161729):0.023051729860410902,54:0.05603674478531248):0.004613016231214573,53:0.060649761016527054):0.009475290346033782,52:0.07012505136256084):0.015529145931696103,51:0.08565419729425694):0.03238712437576452,(50:0.0939990645554519,(((((((((((((49:0.007694863451471301,48:0.007694863451471301):0.006116466751465793,47:0.013811330202937094):0.012914618491869256,46:0.02672594869480635):0.004848105911205408,45:0.03157405460601176):0.005745984193745751,44:0.03732003879975751):0.004800464229214235,43:0.042120503028971745):0.02576535509332986,42:0.0678858581223016):0.004837245325973238,41:0.07272310344827485):0.0036816493427568597,40:0.0764047527910317):3.796663400967848e-05,39:0.07644271942504138):0.0018869224096947634,38:0.07832964183473615):0.008742818059352647,37:0.0870724598940888):0.0021911034208522973,36:0.08926356331494109):0.004735501240510809):0.02404225711456956):0.007342441100261138,(35:0.09638969753421364,(((((((((((((((34:0.0007165631241218828,33:0.0007165631241218828):0.0007837199992689614,32:0.0015002831233908442):0.0058675817934971675,31:0.007367864916888012):0.0010572434466685765,30:0.008425108363556588):0.009229045866893076,29:0.017654154230449665):0.0007677342862057179,28:0.018421888516655383):0.000910089205764214,27:0.019331977722419597):0.0025383078953237215,26:0.021870285617743318):0.009124073659376758,25:0.030994359277120076):0.03270660886015181,24:0.06370096813727189):0.001526441379502197,23:0.06522740951677408):0.01577988374884895,22:0.08100729326562303):0.004559935809367421,21:0.08556722907499045):0.003989758215971195,20:0.08955698729096165):0.0015739939117153934,19:0.09113098120267704):0.0052587163315366):0.028994065236068955):0.012068525525458423,18:0.13745228829574102):0.00035332122399114874,17:0.13780560951973217):0.008830267146082385,16:0.14663587666581457):0.002103465705629373,15:0.14873934237144393):0.0039132765326004015,14:0.15265261890404433):0.0025443785482816422,13:0.15519699745232596):0.022109230273715558,12:0.17730622772604154):0.009025557973672702,11:0.18633178569971423):0.0043174923913942564,10:0.1906492780911085):0.00868381368934875,9:0.19933309178045724):0.008927771980811593):0.01674830275002038):0.007607306540993813,(8:0.03606500491958481,(((((7:0.011908825234297316,6:0.011908825234297316):0.005182237863106287,5:0.017091063097403603):0.008524464170207052,4:0.025615527267610655):0.003976573879307721,3:0.029592101146918376):0.003185795360330451,2:0.03277789650724883):0.0032871084123359817):0.19655146813269822):0.001502914615148225,1:0.23411938766743123):0.0069382105339692995,0:0.24105759820140055):0.01739335597355307;

0 commit comments

Comments
 (0)