-
Notifications
You must be signed in to change notification settings - Fork 1
/
load_results_into_PyMOL.pml
126 lines (95 loc) · 4.34 KB
/
load_results_into_PyMOL.pml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# -*- coding: utf-8 -*-
''' This script loads the results into a PyMOL session by
i) loading the structures in the results folder (*.cif.gz)
ii) annotating the ligand selections (by parsing the ligands.csv file in the results)
Note: The script should be loaded from the default root directory of the result files through PyMOL. A PyMOL installation is required.
'''
import glob
import sys
# Start python session
python
# Specify ligands file
ligands_file = 'ligands.csv'
# Find query file & struct
query_file = None
try:
for filename in glob.glob('structure_files/query_*'):
query_file = filename
query_object = query_file[:10]
query_struct = query_file[6:10]
except Exception():
sys.exit(1)
# Put ligands into list (bulk)
ligands_list = list()
if os.path.exists(ligands_file):
with open (ligands_file, 'r') as ligs_in:
ligs_header = ligs_in.readline()
for line in ligs_in:
ligands_list.append(line[:-1])
else:
print('Ligands file [ligands.csv] not found')
# Get all structure filenames in the results folder
all_files = glob.glob('structure_files/*.cif.gz')
# Put ligands into dict (struct:positions)
if ligands_list:
ligands_dict = dict()
ligands_dict_bulk = dict()
for lig_chain in ligands_list:
structchain = lig_chain.split(',')[0]
positions = lig_chain.split(',')[1]
positions = positions.split('-')
ligands_dict_bulk.setdefault(structchain[:4], []).append(positions) # This groups all ligands under same PDB code, not per chain
ligands_dict.setdefault(structchain, []).append(positions)
# Remove duplicate values from ligands_dict_bulk (preserve the order of values)
for struct, bulk_positions in ligands_dict_bulk.items():
unpack_positions = sorted(sum(bulk_positions, [])) # Unpack list of lists into a single list
ligands_dict_bulk[struct] = list(ligands_dict_bulk.fromkeys(unpack_positions))
# Remove duplicate values from ligands_dict (preserve the order of values)
for struct, bulk_positions in ligands_dict.items():
unpack_positions = sorted(sum(bulk_positions, [])) # Unpack list of lists into a single list
ligands_dict[struct] = list(ligands_dict.fromkeys(unpack_positions))
elif os.path.exists(ligands_file) and not ligands_list:
print('Ligands file [ligands.csv] does not contain any ligands')
# Load all structures into PyMOL session
for file in all_files:
cmd.load(file)
object_name = file[:-7].split(os.sep)[1]
lig_positions = None
if ligands_list: # If ligands file was found and was not empty
if object_name.startswith('query'): # Query
struct = object_name.split('_')[1]
# Get ligands for this object
try:
lig_positions = ligands_dict_bulk[struct]
except Exception:
continue
if lig_positions is not None:
for lig_position in lig_positions:
index = lig_position.split('_')[0]
chain = lig_position.split('_')[1]
resname = lig_position.split('_')[2]
# Restructure ligand selection name
lig_sel = resname + '_' + chain + index
# Create selection
cmd.select(lig_sel + '_query', object_name + ' and chain ' + chain + ' and resi ' + index + ' and resn ' + resname)
else: # Result
structchain_result = object_name.split('_')[1]
structchain_query = object_name.split('_')[4]
# Get ligands for this object
try:
lig_positions = ligands_dict[structchain_result]
except Exception:
continue
if lig_positions is not None:
for lig_position in lig_positions:
index = lig_position.split('_')[0]
chain = lig_position.split('_')[1]
resname = lig_position.split('_')[2]
# Restructure ligand selection name
lig_sel = resname + '_' + chain + index
# Create selection
cmd.select(lig_sel + '_' + structchain_result + '-' + structchain_query, object_name + ' and chain ' + chain + ' and resi ' + index + ' and resn ' + resname)
cmd.deselect()
cmd.orient()
# End python session
python end