@article {3, title = {A novel method to compare protein structures using local descriptors.}, journal = {BMC bioinformatics}, volume = {12}, year = {2011}, month = {08/2011}, pages = {344}, abstract = {BACKGROUND: Protein structure comparison is one of the most widely performed tasks in bioinformatics. However, currently used methods have problems with the so-called "difficult similarities", including considerable shifts and distortions of structure, sequential swaps and circular permutations. There is a demand for efficient and automated systems capable of overcoming these difficulties, which may lead to the discovery of previously unknown structural relationships. RESULTS: We present a novel method for protein structure comparison based on the formalism of local descriptors of protein structure - DEscriptor Defined Alignment (DEDAL). Local similarities identified by pairs of similar descriptors are extended into global structural alignments. We demonstrate the method{\textquoteright}s capability by aligning structures in difficult benchmark sets: curated alignments in the SISYPHUS database, as well as SISY and RIPC sets, including non-sequential and non-rigid-body alignments. On the most difficult RIPC set of sequence alignment pairs the method achieves an accuracy of 77\% (the second best method tested achieves 60\% accuracy). CONCLUSIONS: DEDAL is fast enough to be used in whole proteome applications, and by lowering the threshold of detectable structure similarity it may shed additional light on molecular evolution processes. It is well suited to improving automatic classification of structure domains, helping analyze protein fold space, or to improving protein classification schemes. DEDAL is available online at http://bioexploratorium.pl/EP/DEDAL.}, keywords = {Algorithms, Animals, Bacterial Proteins, Carrier Proteins, Computational Biology, GTP Phosphohydrolases, Humans, Models, Molecular, Proteins, Saposins, Structural Homology, Protein}, issn = {1471-2105}, doi = {10.1186/1471-2105-12-344}, author = {Daniluk, Pawe{\l} and Lesyng, Bogdan} } @article {42, title = {Protein structure prediction center in CASP8.}, journal = {Proteins}, volume = {77 Suppl 9}, year = {2009}, month = {2009}, pages = {5-9}, abstract = {We present an outline of the Critical Assessment of Protein Structure Prediction (CASP) infrastructure implemented at the University of California, Davis, Protein Structure Prediction Center. The infrastructure supports selection and validation of prediction targets, collection of predictions, standard evaluation of submitted predictions, and presentation of results. The Center also supports information exchange relating to CASP experiments and structure prediction in general. Technical aspects of conducting the CASP8 experiment and relevant statistics are also provided.}, keywords = {Computational Biology, Databases, Protein, Models, Molecular, Protein Conformation, Proteins, Software}, issn = {1097-0134}, doi = {10.1002/prot.22517}, author = {Kryshtafovych, Andriy and Krysko, Oleh and Daniluk, Pawe{\l} and Dmytriv, Zinoviy and Fidelis, Krzysztof} } @article {4, title = {Using multi-data hidden Markov models trained on local neighborhoods of protein structure to predict residue-residue contacts.}, journal = {Bioinformatics (Oxford, England)}, volume = {25}, year = {2009}, month = {2009 May 15}, pages = {1264-70}, abstract = {MOTIVATION: Correct prediction of residue-residue contacts in proteins that lack good templates with known structure would take ab initio protein structure prediction a large step forward. The lack of correct contacts, and in particular long-range contacts, is considered the main reason why these methods often fail. RESULTS: We propose a novel hidden Markov model (HMM)-based method for predicting residue-residue contacts from protein sequences using as training data homologous sequences, predicted secondary structure and a library of local neighborhoods (local descriptors of protein structure). The library consists of recurring structural entities incorporating short-, medium- and long-range interactions and is general enough to reassemble the cores of nearly all proteins in the PDB. The method is tested on an external test set of 606 domains with no significant sequence similarity to the training set as well as 151 domains with SCOP folds not present in the training set. Considering the top 0.2 x L predictions (L = sequence length), our HMMs obtained an accuracy of 22.8\% for long-range interactions in new fold targets, and an average accuracy of 28.6\% for long-, medium- and short-range contacts. This is a significant performance increase over currently available methods when comparing against results published in the literature. AVAILABILITY: http://predictioncenter.org/Services/FragHMMent/.}, keywords = {Computational Biology, Databases, Protein, Markov Chains, Models, Molecular, Protein Folding, Protein Structure, Secondary, Proteins}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btp149}, author = {Bj{\"o}rkholm, Patrik and Daniluk, Pawe{\l} and Kryshtafovych, Andriy and Fidelis, Krzysztof and Andersson, Robin and Hvidsten, Torgeir R} } @article {5, title = {Interaction model based on local protein substructures generalizes to the entire structural enzyme-ligand space.}, journal = {Journal of chemical information and modeling}, volume = {48}, year = {2008}, month = {2008 Nov}, pages = {2278-88}, abstract = {Chemogenomics is a new strategy in in silico drug discovery, where the ultimate goal is to understand molecular recognition for all molecules interacting with all proteins in the proteome. To study such cross interactions, methods that can generalize over proteins that vary greatly in sequence, structure, and function are needed. We present a general quantitative approach to protein-ligand binding affinity prediction that spans the entire structural enzyme-ligand space. The model was trained on a data set composed of all available enzymes cocrystallized with druglike ligands, taken from four publicly available interaction databases, for which a crystal structure is available. Each enzyme was characterized by a set of local descriptors of protein structure that describe the binding site of the cocrystallized ligand. The ligands in the training set were described by traditional QSAR descriptors. To evaluate the model, a comprehensive test set consisting of enzyme structures and ligands was manually curated. The test set contained enzyme-ligand complexes for which no crystal structures were available, and thus the binding modes were unknown. The test set enzymes were therefore characterized by matching their entire structures to the local descriptor library constructed from the training set. Both the training and the test set contained enzyme-ligand complexes from all major enzyme classes, and the enzymes spanned a large range of sequences and folds. The experimental binding affinities (p K i) ranged from 0.5 to 11.9 (0.7-11.0 in the test set). The induced model predicted the binding affinities of the external test set enzyme-ligand complexes with an r (2) of 0.53 and an RMSEP of 1.5. This demonstrates that the use of local descriptors makes it possible to create rough predictive models that can generalize over a wide range of protein targets.}, keywords = {Animals, Artificial Intelligence, Cluster Analysis, Computer Simulation, Databases, Protein, Drug Discovery, Enzymes, Informatics, Kinetics, Ligands, Models, Molecular, Molecular Structure, Oxidoreductases Acting on CH-CH Group Donors, Oxidoreductases Acting on CH-NH Group Donors, Plasmodium falciparum, Protein Conformation, Zea mays}, issn = {1549-9596}, doi = {10.1021/ci800200e}, author = {Str{\"o}mbergsson, Helena and Daniluk, Pawe{\l} and Kryshtafovych, Andriy and Fidelis, Krzysztof and Wikberg, Jarl E S and Kleywegt, Gerard J and Hvidsten, Torgeir R} } @article {43, title = {New tools and expanded data analysis capabilities at the Protein Structure Prediction Center.}, journal = {Proteins}, volume = {69 Suppl 8}, year = {2007}, month = {2007}, pages = {19-26}, abstract = {We outline the main tasks performed by the Protein Structure Prediction Center in support of the CASP7 experiment and provide a brief review of the major measures used in the automatic evaluation of predictions. We describe in more detail the software developed to facilitate analysis of modeling success over and beyond the available templates and the adopted Java-based tool enabling visualization of multiple structural superpositions between target and several models/templates. We also give an overview of the CASP infrastructure provided by the Center and discuss the organization of the results web pages available through http://predictioncenter.org.}, keywords = {Computational Biology, Internet, Models, Molecular, Protein Conformation, Protein Folding, Proteins, Software, Structure-Activity Relationship}, issn = {1097-0134}, doi = {10.1002/prot.21653}, author = {Kryshtafovych, Andriy and Prlic, Andreas and Dmytriv, Zinoviy and Daniluk, Pawe{\l} and Milostan, Maciej and Eyrich, Volker and Hubbard, Tim and Fidelis, Krzysztof} } @article {44, title = {CASP6 data processing and automatic evaluation at the protein structure prediction center.}, journal = {Proteins}, volume = {61 Suppl 7}, year = {2005}, month = {2005}, pages = {19-23}, abstract = {We present a short overview of the system governing data processing and automatic evaluation of predictions in CASP6, implemented at the Livermore Protein Structure Prediction Center. The system incorporates interrelated facilities for registering participants, collecting prediction targets from crystallographers and NMR spectroscopists and making them available to the CASP6 participants, accepting predictions and providing their preliminary evaluation, and finally, storing and visualizing results. We have automatically evaluated predictions submitted to CASP6 using criteria and methods developed over the successive CASP experiments. Also, we have tested a new evaluation technique based on non-rigid-body type superpositions. Approximately the same number of predictions has been submitted to CASP6 as to all previous CASPs combined, making navigation through and understanding of the data particularly challenging. To facilitate this, we have substantially modernized all data handling procedures, including implementation of a dedicated relational database. An overview of our redesigned website is also presented (http://predictioncenter.org/casp6/).}, keywords = {Algorithms, Automation, Computational Biology, Crystallography, X-Ray, Internet, Magnetic Resonance Spectroscopy, Models, Molecular, Protein Conformation, Protein Folding, Protein Structure, Secondary, Protein Structure, Tertiary, Proteins, Proteomics, Software}, issn = {1097-0134}, doi = {10.1002/prot.20718}, author = {Kryshtafovych, Andriy and Milostan, Maciej and Szajkowski, Lukasz and Daniluk, Pawe{\l} and Fidelis, Krzysztof} }