#!~/.pyenv/versions/anaconda3-2019.03/envs/my-rdkit-env/bin/python # -- coding: utf-8 -- #source activate my-rdkit-env #source deactivate from __future__ import print_function from sys import argv from math import log from rdkit import Chem import numpy as np from rdkit.Chem import AllChem import re from rdkit.Chem import MACCSkeys from random import randint from random import sample from copy import deepcopy import os import subprocess # mol2_atom_pat=re.compile("^\s+[0-9]+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+([^\s\.]+)") # mol2_atom_pat2=re.compile("^(\s+[0-9]+\s+\S+)\s+\S+\s+\S+\s+\S+(.+)$") import sys #sys.path.append("/hom/masuda/Drug_Search_Python") dire=os.path.abspath(__file__).split("/") dire.pop(-1) dire="/".join(dire) sys.path.append(dire) import func_mol2 as func # import bond_rotate_mol #start function if len(argv) != 11: print("Usage: "+argv[0]+" [target mol] [PDB ligand PDB file list] [protein PDB] [num_cycle] [num_fit] [output_dir] [k1] [k2] [k3] [k4]") exit() protein=func.read_ATOM(argv[3]) #amino=sum([protein[2][i+1].split()[4:] for i in range(int(int(protein[2][1].split()[3])/13+1))],[])##6/18 logP=func.logP(dire+"/logP.list")##6/18##最初[置き換え] 最後[追加] 検討 #print(logP) #print(len(logP)) for res in protein[0]: for atom in protein[1][res]: #print("res[:3],atom[0]=",res[:3],atom[1]) if atom[1]=="OXT": atom.append(-0.2893) continue atom.append(logP[res[:3]][atom[1]]) # print("protein=",protein) if Chem.MolFromMolFile(argv[1])==None: print("This is not molfile") exit() mol0=func.read_mol(argv[1]) #f=open(argv[2],"r") #filename=f.read().split("\n") #f.close() #filename.pop(-1) filename=open(argv[2]).readlines() mol1=[] protein_pdb=[] for i in filename: #print("i=",i) if Chem.MolFromPDBFile(i[:-1]) == None: continue if Chem.MolFromPDBFile(i[:-1]).GetNumAtoms()==0: continue mol1.append(func.read_HETATM(i[:-1])) func.reduce_pdb(protein,mol1,15.0) #within 10.0A #print(protein[1]) j=0 l=[] for res in protein[0]: for el,at,x,y,z,logP in protein[1][res]: li="ATOM "+'{:6d}'.format(j+1) li+=" "+'{: <4s}'.format(at) li+=res+" " li+='{:8.3f}'.format(x) li+='{:8.3f}'.format(y) li+='{:8.3f}'.format(z) li+=" 1.0" li+='{:8.3f}'.format(logP) l.append(li) j+=1 pid = os.getpid() dev_shm="/dev/shm/"+str(pid) reduced=dev_shm+"_reduced.pdb" f=open(reduced,"w") for i in l: f.write(i+"\n") f.close() pos0=func.get_pos_mol(mol0[1]) pos1=[] for i in mol1: pos1.append(func.get_pos_mol(i[1])) fp0=MACCSkeys.FingerprintMol(mol0[1]) fp1=[] for i in mol1: fp1.append(MACCSkeys.GenMACCSKeys(i[1])) # print("fp0=",fp0,"fp1=",fp1) fp=[] len_mol1=len(mol1) for i in range(len_mol1): fp_n=(np.array(fp0) & np.array(fp1[i])).tolist() fp.append(fp_n[1:]) smarts=[i[1][0] for i in list(MACCSkeys.smartsPatts.items())] share=[[smarts[i] for i, x in enumerate(fp[j]) if x == 1]for j in range(len_mol1)] idx0,idx1={},{} for i in range(len_mol1): for j in share[i]: if j=="?": continue if not j in idx0: idx0[j]=[] idx1[j]=[] idx0j=mol0[1].GetSubstructMatches(Chem.MolFromSmarts(j),uniquify=False) for k in idx0j: tmp=[0]+list(k) if not tmp in idx0[j]: idx0[j].append(tmp) idx1j=mol1[i][1].GetSubstructMatches(Chem.MolFromSmarts(j),uniquify=False) for k in idx1j: tmp=[i]+list(k) if not tmp in idx1[j]: idx1[j].append([i]+list(k)) #max1,max2=[],[] #for i in idx1.keys(): # max1.append(len(idx1[i])) # max2.append(len(idx1[i][0])) #print(len(idx1),max(max1),max(max2)) #exit() skeys=idx1.keys() ligall=eval("np.vstack(("+",".join(["pos1["+str(i)+"]" for i in range(len_mol1)])+"))") #ligand all MAX=np.max(ligall,axis=0) MIN=np.min(ligall,axis=0) protein_atom=[] for i in protein[1].values(): protein_atom.extend(i) MAX=np.maximum(MAX,np.max([i[2:-1] for i in protein_atom],axis=0)) MIN=np.minimum(MIN,np.min([i[2:-1] for i in protein_atom],axis=0)) kizami=0.2 r = [ ["N",0.71,2.14], ["O",0.64,1.70], ["C",0.75,2.14], #C,O,N diff ["CL",1.00,2.32], #2019/10/10 cl->CL ["H",0.32,1.60], ["S",1.04,2.27], ["P",1.09,2.47], ["F",0.60,2.57], ["LI",1.30,2.69], ["BE",0.99,2.38], ["B",0.84,2.23], ["NA",1.60,2.99], ["MG",1.40,2.79], ["AL",1.24,2.63], ["SI",1.14,2.53], ["I",1.36,2.67], ["BR",1.17,2.47], ["SE",1.18,2.57], ["MO",1.46,2.85], ["FE",1.24,2.63], ["TE",1.37,2.76], ["ZN",1.20,2.59], ["CU",1.22,2.61], ["CO",1.18,2.57], ["RU",1.36,2.75], ["AS",1.20,2.59], ["SN",1.40,2.79], ["D",0.32,1.60], ["K",2.00,3.39], ["OS",1.41,2.80] #[1.70*X/0.75,3.08+(3.08-2.75)/(1.70-1.45)*(X-1.70)] ] #atomic and ionic radii of elements by Chemistry data booklet 2016 #add 1.38 o={} for i,k in enumerate(r): o[k[0]]=i #width = np.max([i[1:] for i in r]) #M,m=np.max(ligall,axis=0),np.min(ligall,axis=0) #if min(min(MAX-M),max(m-MIN)) < width: # width=min(min(MAX-M),max(m-MIN)) width = np.max([i[1:] for i in r])+0.1 #余裕をもたせた原子半径の最大値 + #ligand out protein #width=max(np.max(pos0,axis=0)-np.min(pos0,axis=0))*2 x,y,z=map(int,np.ceil((MAX - MIN + width*2)/kizami)) #0.2=kizami #print("x,y,z=(",x,",",y,",",z,")",sep="") #2020/07/12 print("x,y,z=(",x,",",y,",",z,")",sep="") #2020/07/12 #print(x,y,z) #2020/07/12 ##if x > 550 or y > 550 or z > 550: ## kizami=0.3 # width = np.max([i[1:] for i in r]) # if min(min(MAX-M),max(m-MIN)) < -width: # width=min(min(MAX-M),max(m-MIN)) ## x,y,z=map(int,np.ceil((MAX - MIN + width*2)/kizami)) #0.3=kizami N=1 k1,k2,k3,k4=float(argv[7]),float(argv[8]),float(argv[9]),float(argv[10]) #1.0,1.0 searchp=re.search("(.+).mol$",argv[1]) output_dir=searchp.groups()[0] #print(output_dir) #if not os.path.exists(output_dir): # os.makedirs(output_dir) #print("start:write") #pid = os.getpid() #dev_shm="/dev/shm/"+str(pid) f=open(dev_shm,"w") for i in mol0[2]: f.write(i+"\n") f.write(str(len(mol0[0]))+"\n") for i in mol0[0]: f.write(str(o[i[0].upper()])+" "+str(i[1])+" "+str(i[2])+" "+str(i[3])+" "+str(i[4])+"\n") #print("start:mol1") f.write(str(len_mol1)+"\n") for i in mol1: f.write(str(len(i[0]))+"\n") for j in i[0]: #print("j=",j) j[0]=o[j[0]] f.write(str(j[0])+" "+str(j[1])+" "+str(j[2])+" "+str(j[3])+"\n") f.write(str(len(idx0))+"\n") num_idx0=[] num_idx0l=[] for i in skeys: num_idx0.append(len(idx0[i])) num_idx0l.append(len(idx0[i][0])) #print("start:idx0") for i in num_idx0: f.write(str(i)+" ") f.write("\n") for i in num_idx0l: f.write(str(i)+" ") f.write("\n") for i in skeys: for j in idx0[i]: for k in range(len(j)): f.write(str(j[k])+" ") f.write("\n") #print("start:idx1") num_idx1=[] num_idx1l=[] for i in skeys: num_idx1.append(len(idx1[i])) num_idx1l.append(len(idx1[i][0])) for i in num_idx1: f.write(str(i)+" ") f.write("\n") for i in num_idx1l: f.write(str(i)+" ") f.write("\n") for i in skeys: for j in idx1[i]: for k in range(len(j)): f.write(str(j[k])+" ") f.write("\n") #min width kizami k2# f.write("m="+str(MIN[0])+" "+str(MIN[1])+" "+str(MIN[2])+" width="+str(width)+" kizami="+str(kizami)+" k1="+str(k1)+" k2="+str(k2)+" k3="+str(k3)+" k4="+str(k4)+"\n") f.write(str(len(r))+"\n") for i in r: f.write(str(i[1])+" "+str(i[2])+"\n") f.write(str(x)+" "+str(y)+" "+str(z)+"\n") for i in protein_atom: i[0]=o[i[0]] f.write(str(len(protein_atom))+"\n") for i in protein_atom: f.write(str(i[0])+" "+str(i[2])+" "+str(i[3])+" "+str(i[4])+" "+str(i[5])+"\n") f.close() func.wait_memory(70,30) mono=dire+"/main_mol2_monolith" subprocess.call([mono,dev_shm,argv[4],argv[5],argv[6],reduced]) #subprocess.call(["/home/masuda//main_mol2_monolith",dev_shm,argv[4],argv[5],argv[6],reduced]) m1,m2=dev_shm+".mol",dev_shm+".mol2" #print(reduced,dev_shm,m1,m2) subprocess.call(["rm",reduced,dev_shm,m1,m2])