77#
88# 2016-03-22: created by Anthony Reilly, The Cambridge Crystallographic Data Centre
99# 2016-12-06: updated by Anthony Reilly, The Cambridge Crystallographic Data Centre
10- #
10+ #19/01/2023: edited by Jonas Nyman for instances with large clusters
1111
1212"""Packing_Similarity_Dendrogram.py - Construct a dendrogram for an input set of structures based on packing-similarity
1313analysis
@@ -98,7 +98,7 @@ def merge_clusters(c1, c2, level):
9898 return new_cluster
9999
100100
101- def plot_dendrogram (cluster_list , n_ps_mols , filename ):
101+ def plot_dendrogram (cluster_list , n_ps_mols , filename , pad_length ):
102102 """
103103 Function for producing a dendrogram from an input cluster hierarchy
104104 """
@@ -187,43 +187,25 @@ def plot_tree(cluster, x_start_positions, y_start_positions, hs):
187187
188188 # Setup tree plotting by getting each terminal's height
189189 heights , count = assign_y_positions (cluster_list [0 ], 0 , {})
190-
191190 # Set start of the tree - middle of the plot and 1,1
192191 xpositions = [1 , 1 ]
193192 ypositions = [1 , get_midpoint (cluster_list [0 ], heights )]
194-
195193 # Plot tree
196194 plot_tree (cluster_list [0 ], xpositions , ypositions , heights )
197195
198- # Plot formatting
199- ax = plt .axes ()
200- ax .set_frame_on (False )
201- ax .axes .get_yaxis ().set_visible (False )
202- ax .spines ['top' ].set_visible (False )
203- ax .spines ['bottom' ].set_visible (False )
204- ax .spines ['left' ].set_visible (False )
205- ax .spines ['right' ].set_visible (False )
206-
207- levels = range (n_ps_mols , - 1 , - 1 )
208- highlighted_levels = range (n_ps_mols , - 1 , - 1 )
209- # highlighted_levels.append(0)
210-
211- for level in highlighted_levels :
212- plt .plot ([level , level ], [0 , count ], "--" , linewidth = 0.5 , color = "Gray" , zorder = 1 )
213-
214196 # Pad the plot to have enough space for structure indices
215- plt .xlim (- 1 , n_ps_mols + 5 )
197+ plt .xlim (- 1 , n_ps_mols + pad_length )
216198 plt .ylim (0 , count + 1 )
217- ax .set_xticks (levels )
218- ax . tick_params ( axis = 'x' , bottom = 'off' , top = 'off' )
199+ plt . xticks ( np . arange ( 1 , n_ps_mols + 2 , 2 )) # IJS 06/09/22 addition to replace ax.set_xticks(levels)
200+
219201 plt .xlabel ('Packing Similarity / ' + str (n_ps_mols ) + ' Molecules' , fontsize = 'large' )
220202 # Save output
221203 plt .savefig (filename + "_packing_similarity_tree.png" , dpi = 1000 , bbox_inches = 'tight' )
222204 print ("Packing tree diagram saved to " + filename + "_packing_similarity_tree.png" )
223205
224206
225207def main (input_file , matrix_file , n_ps_mols , output_ps_results , conf_threshold , ps_angles , ps_distances , strip ,
226- n_struct , allow_mol_diff , cluster_mode ):
208+ n_struct , allow_mol_diff , cluster_mode , pad_length ):
227209 # Initialise Packing Similarity
228210 ps = PackingSimilarity ()
229211 ps .settings .ignore_hydrogen_positions = True
@@ -244,10 +226,9 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
244226 ps .settings .packing_shell_size = n_ps_mols
245227 ps .settings .angle_tolerance = ps_angles
246228 ps .settings .distance_tolerance = ps_distances
247-
248229 refcodes = []
249230
250- input_name = input_file . rsplit ("." )[0 ]
231+ input_name = os . path . basename ( input_file ). split ("." )[0 ]
251232 print ("--------------------------------------------------------" )
252233
253234 if not matrix_file :
@@ -281,13 +262,11 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
281262 if not os .path .exists (overlay_folder ):
282263 os .makedirs (overlay_folder )
283264
284- for i in range (0 , structure_size ):
285- refcodes .append (str (structure_reader [i ].identifier ))
286-
287265 for i in range (0 , structure_size ):
288266 entry_i = structure_reader [i ]
289267 crystal_i = entry_i .crystal
290- refcodes .append (str (structure_reader [i ].identifier ))
268+ refcodes .append (str (i + 1 ))
269+
291270
292271 for j in range (i , structure_size ):
293272 if i == j :
@@ -395,9 +374,15 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
395374 y = np .arange (0 , structure_size + 1 , 1 )
396375
397376 plot = plt .pcolor (x , y , matrix , cmap = plt .get_cmap ('rainbow' , (n_ps_mols - 1 )), vmin = 1 , vmax = n_ps_mols )
398- plt .xticks (np .arange (0 , structure_size + 1 , 5 ) - 0.5 , np .arange (0 , structure_size + 1 , 5 ))
399- plt .yticks (np .arange (0 , structure_size + 1 , 5 ) - 0.5 , np .arange (0 , structure_size + 1 , 5 ))
400- cb = plt .colorbar (plot , ticks = range (1 , 16 ))
377+ if structure_size < 10 :
378+ plt .xticks (np .arange (0 , structure_size + 1 ) - 0.5 , np .arange (0 , structure_size + 1 ))
379+ plt .yticks (np .arange (0 , structure_size + 1 ) - 0.5 , np .arange (0 , structure_size + 1 ))
380+ else :
381+ plt .xticks (np .arange (0 , structure_size + 1 , 5 ) - 0.5 , np .arange (0 , structure_size + 1 , 5 ))
382+ plt .yticks (np .arange (0 , structure_size + 1 , 5 ) - 0.5 , np .arange (0 , structure_size + 1 , 5 ))
383+ cb = plt .colorbar (plot , ticks = range (1 , n_ps_mols + 1 ))
384+
385+
401386
402387 plt .xlim (0 , structure_size )
403388 plt .ylim (0 , structure_size )
@@ -407,11 +392,11 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
407392 cb .set_label ('Packing Similarity /' + str (n_ps_mols ) + ' Molecules' , fontsize = 'x-large' )
408393 plt .savefig (input_name + "_heat_map.png" , dpi = 300 )
409394 print ("Packing similarity heat map saved to " + input_name + "_heat_map.png" )
395+ ax .clear ()
410396 plt .close ()
411397
412398 # Plot a dendrogram
413- plot_dendrogram (cluster_list , n_ps_mols , input_name )
414-
399+ plot_dendrogram (cluster_list , n_ps_mols , input_name , pad_length )
415400 print ("--------------------------------------------------------" )
416401
417402 sys .exit ()
@@ -445,6 +430,8 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
445430 help = "Tolerance for angles (in degrees) used by packing similarity." )
446431 parser .add_argument ('-dt' , '--dist_tol' , type = float , default = 0.25 , metavar = "0.25" ,
447432 help = "Fractional tolerance for distances (0.0 - 1.0) used by packing similarity." )
433+ parser .add_argument ('-pd' , '--pad_length' , type = float , default = 5.0 , metavar = "0.25" ,
434+ help = "padding on right of the plot for listing identifiers" ) # IJS 06/09/22 addition, as I hve an example with many refcodes that overspills the plot
448435 args = parser .parse_args ()
449436 if not os .path .isfile (args .input_file ):
450437 parser .error ('%s not found.' % args .input_file )
@@ -454,4 +441,4 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
454441
455442 main (args .input_file , args .matrix , args .n_molecules , args .o , args .conf_tol , args .angle_tol ,
456443 args .dist_tol , args .strip , args .n_structures , args .allow_molecular_differences ,
457- args .clustering_type )
444+ args .clustering_type , args . pad_length )
0 commit comments