#include #include #include #include #include #include /* DockRMSD: an open-source tool for atom mapping and RMSD calculation of symmetric molecules through graph isomorphism Written by Eric Bell v1.0 written 5/2/2019 latest update (v1.1) written 8/26/2019 To compile, use: gcc DockRMSD.c -o DockRMSD -lm -O3 */ const int MAXBONDS=6; //Maximum number of bonds allowable on a single atom const int MAXLINELENGTH=150; //Maximum length (in characters) of a line in a mol2 file const double MAXMAPCOUNT=0; //Maximum amount of possible mappings before symmetry heuristic is used const int MAXDEPTH=2; const char* header="######################################################################\n" "# DockRMSD (v1.1): docking pose distance calculation #\n" "# #\n" "# If you use DockRMSD in your work, please cite: #\n" "# #\n" "# Bell, E.W., Zhang, Y. DockRMSD: an open-source tool for atom #\n" "# mapping and RMSD calculation of symmetric molecules through graph #\n" "# isomorphism. Journal of Cheminformatics, 11:40 (2019). #\n" "######################################################################\n"; int grabAtomCount(FILE* mol2,int hflag); int arrayIdentity(char** arr1,char** arr2, int arrlen); int inArray(int n, int* arr, int arrlen); void readMol2(char** atoms, double** coords, char*** bonds, int* nums, FILE* mol2, int atomcount, int hflag); int generalizeBonds(char*** bonds, int atomcount); char** buildTree(int depth, int index, char** atoms, char*** bonds, char* prestring, int prevind, int atomcount); double searchAssigns(int atomcount, int** allcands, int candcounts[], int* assign, char*** tempbond, char*** querybond, double** querycoord, double** tempcoord, int* bestassign); void assignAtoms(char** tempatom, char*** tempbond, char** queryatom, char*** querybond, double** querycoord, double** tempcoord, int* querynums, int* tempnums, int atomcount, int simpleflag); int validateBonds(int* atomassign, int proposedatom, int assignpos, char*** querybond, char*** tempbond, double querydists[], int atomcount); int main(int argc, char* argv[]){ //Check if all inputs are given and valid if (argc<3 || argc>6){ printf("Program must have a query and template mol2 filename, and at most 3 options\n"); return 1; } FILE* query=fopen(argv[1],"r"); FILE* template=fopen(argv[2],"r"); if (query==NULL){ printf("Query file %s not found!\n",argv[1]); return 1; } if (template==NULL){ printf("Template file %s not found!\n",argv[2]); return 1; } //Runtime flag processing int hflag=0; int corrflag=0; int simpleflag=0; int i; for (i=3;i 1 && line[strlen(line)-2]=='\r'){ //For windows line endings line[strlen(line)-2]='\n'; line[strlen(line)-1]='\0'; } if (!strcmp(line,"@ATOM\n")){ countflag=1; continue; } if (!strcmp(line,"@BOND\n")){ countflag=0; break; } if (countflag && strlen(line)>1){ char* token=strtok(line," \t"); int i; for (i=0;i<5;i++){ token=strtok(NULL," \t"); } if (hflag || strcmp(token,"H")){ atomcount++; } } } if(ferror(mol2)){ printf("Error %d while reading in file.\n",ferror(mol2)); } rewind(mol2); //resets the file pointer for use in other functions return atomcount; } //Fills atoms, coords, and bonds with information contained within a mol2 file void readMol2(char** atoms, double** coords, char*** bonds, int* nums, FILE* mol2, int atomcount, int hflag){ char line[MAXLINELENGTH]; int atomnums[atomcount]; //Keeps track of all non-H atom numbers for bond reading int i=0; int sectionflag=0; //Value is 1 when reading atoms, 2 when reading bonds, 0 before atoms, >2 after bonds while(fgets(line,MAXLINELENGTH,mol2)!=NULL){ if (strlen(line) < 2){ //Skip empty lines continue; } if (strlen(line) > 1 && line[strlen(line)-2]=='\r'){ //Handling windows line endings line[strlen(line)-2]='\n'; line[strlen(line)-1]='\0'; } if (!strcmp(line,"@ATOM\n") || (sectionflag && line[0]=='@')){ sectionflag++; } else if (sectionflag==1){ //Reading in atoms and coordinates double coord[3]; int j=0; char* parts=strtok(line," \t"); int atomnum=atoi(parts); parts=strtok(NULL," \t"); for (j=0;j<3;j++){ parts=strtok(NULL," \t"); coord[j]=atof(parts); } parts=strtok(NULL," \t"); if (hflag || strcmp("H",parts)){ char* element=strtok(parts,"."); strcpy(*(atoms+i),element); atomnums[i]=atomnum; for (j=0;j<3;j++){ *(*(coords+i)+j)=coord[j]; } i++; } } else if (sectionflag==2){ //Reading in bonding network char* parts=strtok(line," \t"); parts=strtok(NULL," \t"); int from=inArray(atoi(parts),atomnums,atomcount)-1; parts=strtok(NULL," \t"); int to=inArray(atoi(parts),atomnums,atomcount)-1; parts=strtok(NULL," \t"); parts=strtok(parts,"\n"); if (from >= 0 && to >= 0){ strcpy(*(*(bonds+to)+from),parts); strcpy(*(*(bonds+from)+to),parts); } } } for(i=0;i*(*(dists+index)+j+1)){ int tempind=*(*(allcands+index)+j); double tempdist=*(*(dists+index)+j); *(*(allcands+index)+j)=*(*(allcands+index)+j+1); *(*(dists+index)+j)=*(*(dists+index)+j+1); *(*(allcands+index)+j+1)=tempind; *(*(dists+index)+j+1)=tempdist; } } } } /* for (i=0;i0 && histinds[index]==candcounts[history[index]]){ histinds[index]=0; *(assign+history[index])=-1; for (i=0;i bestTotal){ //Dead end elimination check break; } if (!inArray(*(*(allcands+history[index])+i),assign,atomcount) && validateBonds(assign,*(*(allcands+history[index])+i),history[index],querybond,tempbond,querydists,atomcount)){ //Feasibility check foundflag=1; *(assign+history[index])=*(*(allcands+history[index])+i); histinds[index]=i+1; runningTotal+=*(*(dists+history[index])+i); index++; break; } } if (!foundflag){ //This occurs if none of the remaining possibilities can be mapped if (index==0){ break; } else { histinds[index]=0; *(assign+history[index])=-1; for (i=0;i=0 && !strcmp(*(*(tempbond+proposedatom)+assignatom),"")){ return 0; } } return 1; } //Returns the lowest RMSD of all possible mappings for query atoms with template indices given the two molecules' bonding network void assignAtoms(char** tempatom, char*** tempbond, char** queryatom, char*** querybond, double** querycoord, double** tempcoord, int* querynums, int* tempnums, int atomcount, int simpleflag){ int** allcands=malloc(sizeof(int*)*atomcount); //List of all atoms in the template that could feasibly be each query atom int candcounts[atomcount]; //Number of atoms in the template that could feasibly be each query atom int i; //Iterate through each query atom and determine which template atoms correspond to the query for(i=0;i Second file, * indicates correspondence is not one-to-one):\n"); for (i=0;i %s%3d ",*(queryatom+i),*(querynums+i),*(tempatom+*(bestassign+i)),*(tempnums+*(bestassign+i))); if (*(querynums+i)==*(tempnums+*(bestassign+i))){ printf("\n"); } else { printf("*\n"); } } } free(assign); free(bestassign); }