Skip to content

Commit a58d99b

Browse files
authored
Merge pull request #13 from ccdc-opensource/matplotlib_fix
Matplotlib fix
2 parents b1c0669 + b4c8456 commit a58d99b

File tree

2 files changed

+29
-37
lines changed

2 files changed

+29
-37
lines changed

scripts/packing_similarity_dendrogram/ReadMe.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,11 @@ similarity.
7272
-dt 0.25, --dist_tol 0.
7373
Fractional tolerance for distances (0.0 - 1.0) used by
7474
packing similarity.
75+
-pd 5, --pad_length 5
76+
pad right hand side of plot to make room for structure names
7577
```
7678

77-
## Basic usage (in a command prompt):
79+
## Basic usage (in a command prompt)
7880

7981
```cmd
8082
python Packing_Similarity_Dendrogram.py input_file
@@ -137,6 +139,9 @@ clusters at the worst or lowest level connecting them. Figure 2 shows the same d
137139
which gives more definite clustering but will also hide some similarities due to the hierarchical nature of the
138140
clustering.
139141

142+
If there are dangling nodes without an associated identifier, it is likely that there multiple structures given the same name
143+
in the supplied file (searched through multiple databases say)
144+
140145
![Figure 1](dendogram_figure_1.png)
141146
**Figure 1** : Example dendrogram based on a database of carbamazepine solid forms using the default single-linkage
142147
clustering.

scripts/packing_similarity_dendrogram/packing_similarity_dendogram.py

Lines changed: 23 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#
88
# 2016-03-22: created by Anthony Reilly, The Cambridge Crystallographic Data Centre
99
# 2016-12-06: updated by Anthony Reilly, The Cambridge Crystallographic Data Centre
10-
#
10+
#19/01/2023: edited by Jonas Nyman for instances with large clusters
1111

1212
"""Packing_Similarity_Dendrogram.py - Construct a dendrogram for an input set of structures based on packing-similarity
1313
analysis
@@ -98,7 +98,7 @@ def merge_clusters(c1, c2, level):
9898
return new_cluster
9999

100100

101-
def plot_dendrogram(cluster_list, n_ps_mols, filename):
101+
def plot_dendrogram(cluster_list, n_ps_mols, filename, pad_length):
102102
"""
103103
Function for producing a dendrogram from an input cluster hierarchy
104104
"""
@@ -187,43 +187,25 @@ def plot_tree(cluster, x_start_positions, y_start_positions, hs):
187187

188188
# Setup tree plotting by getting each terminal's height
189189
heights, count = assign_y_positions(cluster_list[0], 0, {})
190-
191190
# Set start of the tree - middle of the plot and 1,1
192191
xpositions = [1, 1]
193192
ypositions = [1, get_midpoint(cluster_list[0], heights)]
194-
195193
# Plot tree
196194
plot_tree(cluster_list[0], xpositions, ypositions, heights)
197195

198-
# Plot formatting
199-
ax = plt.axes()
200-
ax.set_frame_on(False)
201-
ax.axes.get_yaxis().set_visible(False)
202-
ax.spines['top'].set_visible(False)
203-
ax.spines['bottom'].set_visible(False)
204-
ax.spines['left'].set_visible(False)
205-
ax.spines['right'].set_visible(False)
206-
207-
levels = range(n_ps_mols, -1, -1)
208-
highlighted_levels = range(n_ps_mols, -1, -1)
209-
# highlighted_levels.append(0)
210-
211-
for level in highlighted_levels:
212-
plt.plot([level, level], [0, count], "--", linewidth=0.5, color="Gray", zorder=1)
213-
214196
# Pad the plot to have enough space for structure indices
215-
plt.xlim(-1, n_ps_mols + 5)
197+
plt.xlim(-1, n_ps_mols + pad_length)
216198
plt.ylim(0, count + 1)
217-
ax.set_xticks(levels)
218-
ax.tick_params(axis='x', bottom='off', top='off')
199+
plt.xticks(np.arange(1,n_ps_mols+2,2)) # IJS 06/09/22 addition to replace ax.set_xticks(levels)
200+
219201
plt.xlabel('Packing Similarity / ' + str(n_ps_mols) + ' Molecules', fontsize='large')
220202
# Save output
221203
plt.savefig(filename + "_packing_similarity_tree.png", dpi=1000, bbox_inches='tight')
222204
print("Packing tree diagram saved to " + filename + "_packing_similarity_tree.png")
223205

224206

225207
def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold, ps_angles, ps_distances, strip,
226-
n_struct, allow_mol_diff, cluster_mode):
208+
n_struct, allow_mol_diff, cluster_mode, pad_length):
227209
# Initialise Packing Similarity
228210
ps = PackingSimilarity()
229211
ps.settings.ignore_hydrogen_positions = True
@@ -244,10 +226,9 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
244226
ps.settings.packing_shell_size = n_ps_mols
245227
ps.settings.angle_tolerance = ps_angles
246228
ps.settings.distance_tolerance = ps_distances
247-
248229
refcodes = []
249230

250-
input_name = input_file.rsplit(".")[0]
231+
input_name = os.path.basename(input_file).split(".")[0]
251232
print("--------------------------------------------------------")
252233

253234
if not matrix_file:
@@ -281,13 +262,11 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
281262
if not os.path.exists(overlay_folder):
282263
os.makedirs(overlay_folder)
283264

284-
for i in range(0, structure_size):
285-
refcodes.append(str(structure_reader[i].identifier))
286-
287265
for i in range(0, structure_size):
288266
entry_i = structure_reader[i]
289267
crystal_i = entry_i.crystal
290-
refcodes.append(str(structure_reader[i].identifier))
268+
refcodes.append(str(i+1))
269+
291270

292271
for j in range(i, structure_size):
293272
if i == j:
@@ -395,9 +374,15 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
395374
y = np.arange(0, structure_size + 1, 1)
396375

397376
plot = plt.pcolor(x, y, matrix, cmap=plt.get_cmap('rainbow', (n_ps_mols - 1)), vmin=1, vmax=n_ps_mols)
398-
plt.xticks(np.arange(0, structure_size + 1, 5) - 0.5, np.arange(0, structure_size + 1, 5))
399-
plt.yticks(np.arange(0, structure_size + 1, 5) - 0.5, np.arange(0, structure_size + 1, 5))
400-
cb = plt.colorbar(plot, ticks=range(1, 16))
377+
if structure_size < 10:
378+
plt.xticks(np.arange(0, structure_size + 1) - 0.5, np.arange(0, structure_size + 1))
379+
plt.yticks(np.arange(0, structure_size + 1) - 0.5, np.arange(0, structure_size + 1))
380+
else:
381+
plt.xticks(np.arange(0, structure_size + 1, 5) - 0.5, np.arange(0, structure_size + 1, 5))
382+
plt.yticks(np.arange(0, structure_size + 1, 5) - 0.5, np.arange(0, structure_size + 1, 5))
383+
cb = plt.colorbar(plot, ticks=range(1, n_ps_mols+1))
384+
385+
401386

402387
plt.xlim(0, structure_size)
403388
plt.ylim(0, structure_size)
@@ -407,11 +392,11 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
407392
cb.set_label('Packing Similarity /' + str(n_ps_mols) + ' Molecules', fontsize='x-large')
408393
plt.savefig(input_name + "_heat_map.png", dpi=300)
409394
print("Packing similarity heat map saved to " + input_name + "_heat_map.png")
395+
ax.clear()
410396
plt.close()
411397

412398
# Plot a dendrogram
413-
plot_dendrogram(cluster_list, n_ps_mols, input_name)
414-
399+
plot_dendrogram(cluster_list, n_ps_mols, input_name, pad_length)
415400
print("--------------------------------------------------------")
416401

417402
sys.exit()
@@ -445,6 +430,8 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
445430
help="Tolerance for angles (in degrees) used by packing similarity.")
446431
parser.add_argument('-dt', '--dist_tol', type=float, default=0.25, metavar="0.25",
447432
help="Fractional tolerance for distances (0.0 - 1.0) used by packing similarity.")
433+
parser.add_argument('-pd', '--pad_length', type=float, default=5.0, metavar="0.25",
434+
help="padding on right of the plot for listing identifiers") # IJS 06/09/22 addition, as I hve an example with many refcodes that overspills the plot
448435
args = parser.parse_args()
449436
if not os.path.isfile(args.input_file):
450437
parser.error('%s not found.' % args.input_file)
@@ -454,4 +441,4 @@ def main(input_file, matrix_file, n_ps_mols, output_ps_results, conf_threshold,
454441

455442
main(args.input_file, args.matrix, args.n_molecules, args.o, args.conf_tol, args.angle_tol,
456443
args.dist_tol, args.strip, args.n_structures, args.allow_molecular_differences,
457-
args.clustering_type)
444+
args.clustering_type, args.pad_length)

0 commit comments

Comments
 (0)