Publications

Authors: Type:

2020

  • [DOI] Witting, M. & Böcker, S. (2020) Current status of retention time prediction in metabolite identification. IN J sep sci, 43.1746-1754.
    [Bibtex]
    @Article{witting20current,
    author = {Witting, Michael and B\"{o}cker, Sebastian},
    title = {Current status of retention time prediction in metabolite identification},
    journal = {J Sep Sci},
    volume = {43},
    number = {9--10},
    pages = {1746-1754},
    year = {2020},
    doi = {10.1002/jssc.202000060},
    abstract = {Metabolite identification is a crucial step in non-targeted metabolomics, but also represents one of its current bottlenecks. Accurate identifications are required for correct biological interpretation. To date, annotation and identification are usually based on the use of accurate mass search or tandem MS analysis, but neglect orthogonal information such as retention times obtained by chromatographic separation. While several tools are available for the analysis and prediction of tandem MS data, prediction of retention times for metabolite identification are not widespread. Here, we review the current state of retention time prediction in liquid chromatography-mass spectrometry-based metabolomics, with a focus on publications published after 2010.},
    doi = {10.1002/jssc.202000060},
    keywords = {jena; Liquid Chromatography-Mass Spectrometry; Metabolite identification; Metabolomics; Retention time prediction},
    url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/jssc.202000060},
    }
  • [DOI] Dührkop, K., Nothias, L. F., Fleischauer, M., Ludwig, M., Hoffmann, M. A., Rousu, J., Dorrestein, P. C. & Böcker, S. (2020) Classes for the masses: systematic classification of unknowns using fragmentation spectra. IN Biorxiv, ..
    [Bibtex]
    @Article{duehrkop20classes,
    author = {D{\"u}hrkop, Kai and Nothias, Louis Felix and Fleischauer, Markus and Ludwig, Marcus and Hoffmann, Martin A. and Rousu, Juho and Dorrestein, Pieter C. and B{\"o}cker, Sebastian},
    journal = {bioRxiv},
    title = {Classes for the masses: Systematic classification of unknowns using fragmentation spectra},
    year = {2020},
    month = jan,
    abstract = {Metabolomics experiments can employ non-targeted tandem mass spectrometry to detect hundreds to thousands of molecules in a biological sample. Structural annotation of molecules is typically carried out by searching their fragmentation spectra in spectral libraries or, recently, in structure databases. Annotations are limited to structures present in the library or database employed, prohibiting a thorough utilization of the experimental data. We present a computational tool for systematic compound class annotation: CANOPUS uses a deep neural network to predict 1,270 compound classes from fragmentation spectra, and explicitly targets compounds where neither spectral nor structural reference data are available. CANOPUS even predicts classes for which no MS/MS training data are available. We demonstrate the broad utility of CANOPUS by investigating the effect of the microbial colonization in the digestive system in mice, and through analysis of the chemodiversity of different Euphorbia plants; both uniquely revealing biological insights at the compound class level.},
    doi = {10.1101/2020.04.17.046672},
    keywords = {jena; MS; MS/MS; CANOPUS; compound classes;},
    url = {http://biorxiv.org/content/early/2020/04/18/2020.04.17.046672.abstract},
    }
  • [DOI] Nothias, L., Petras, D., Schmid, R., Dührkop, K., Rainer, J., Sarvepalli, A., Protsyuk, I., Ernst, M., Tsugawa, H., Fleischauer, M., Aicheler, F., Aksenov, A. A., Alka, O., Allard, P., Barsch, A., Cachet, X., Caraballo-Rodriguez, A. M., Da Silva, R. R., Dang, T., Garg, N., Gauglitz, J. M., Gurevich, A., Isaac, G., Jarmusch, A. K., Kameník, Z., Kang, K. B., Kessler, N., Koester, I., Korf, A., Le Gouellec, A., Ludwig, M., Martin H, C., McCall, L., McSayles, J., Meyer, S. W., Mohimani, H., Morsy, M., Moyne, O., Neumann, S., Neuweger, H., Nguyen, N. H., Nothias-Esposito, M., Paolini, J., Phelan, V. V., Pluskal, T., Quinn, R. A., Rogers, S., Shrestha, B., Tripathi, A., van der Hooft, J. J. J., Vargas, F., Weldon, K. C., Witting, M., Yang, H., Zhang, Z., Zubeil, F., Kohlbacher, O., Böcker, S., Alexandrov, T., Bandeira, N., Wang, M. & Dorrestein, P. C. (2020) Feature-based molecular networking in the GNPS analysis environment. IN Nat methods, 17.905-908.
    [Bibtex]
    @Article{nothias20feature-based,
    author = {Nothias, Louis-F\'{e}lix and Petras, Daniel and Schmid, Robin and D\"{u}hrkop, Kai and Rainer, Johannes and Sarvepalli, Abinesh and Protsyuk, Ivan and Ernst, Madeleine and Tsugawa, Hiroshi and Fleischauer, Markus and Aicheler, Fabian and Aksenov, Alexander A. and Alka, Oliver and Allard, Pierre-Marie and Barsch, Aiko and Cachet, Xavier and Caraballo-Rodriguez, Andres Mauricio and Da Silva, Ricardo R. and Dang, Tam and Garg, Neha and Gauglitz, Julia M. and Gurevich, Alexey and Isaac, Giorgis and Jarmusch, Alan K. and Kamen\'{\i}k, Zden\u{e}k and Kang, Kyo Bin and Kessler, Nikolas and Koester, Irina and Korf, Ansgar and Le Gouellec, Audrey and Ludwig, Marcus and Martin H, Christian and McCall, Laura-Isobel and McSayles, Jonathan and Meyer, Sven W. and Mohimani, Hosein and Morsy, Mustafa and Moyne, Oriane and Neumann, Steffen and Neuweger, Heiko and Nguyen, Ngoc Hung and Nothias-Esposito, Melissa and Paolini, Julien and Phelan, Vanessa V. and Pluskal, Tom\'a\v{s} and Quinn, Robert A. and Rogers, Simon and Shrestha, Bindesh and Tripathi, Anupriya and van der Hooft, Justin J. J. and Vargas, Fernando and Weldon, Kelly C. and Witting, Michael and Yang, Heejung and Zhang, Zheng and Zubeil, Florian and Kohlbacher, Oliver and B\"{o}cker, Sebastian and Alexandrov, Theodore and Bandeira, Nuno and Wang, Mingxun and Dorrestein, Pieter C.},
    journal = {Nat Methods},
    title = {Feature-based Molecular Networking in the {GNPS} Analysis Environment},
    year = {2020},
    number = {9},
    pages = {905-908},
    volume = {17},
    abstract = {Molecular networking has become a key method to visualize and annotate the chemical space in non-targeted mass spectrometry data. We present feature-based molecular networking (FBMN) as an analysis method in the Global Natural Products Social Molecular Networking (GNPS) infrastructure that builds on chromatographic feature detection and alignment tools. FBMN enables quantitative analysis and resolution of isomers, including from ion mobility spectrometry.},
    doi = {10.1038/s41592-020-0933-6},
    keywords = {jena; GNPS; FBMN;},
    pmid = {32839597},
    }
  • [DOI] Ludwig, M., Fleischauer, M., Dührkop, K., Hoffmann, M. A. & Böcker, S. (2020) De novo molecular formula annotation and structure elucidation using SIRIUS~4. IN Li, S. (Ed.), Computational methods and data analysis for metabolomics. New York, NY.
    [Bibtex]
    @InCollection{ludwig20denovo,
    author = {Ludwig, Marcus and Fleischauer, Markus and D{\"u}hrkop, Kai and Hoffmann, Martin A. and B{\"o}cker, Sebastian},
    editor = {Li, Shuzhao},
    volume = {2104},
    pages = {185--207},
    publisher = {Springer US},
    title = {De Novo Molecular Formula Annotation and Structure Elucidation Using {SIRIUS~4}},
    year = {2020},
    address = {New York, NY},
    isbn = {978-1-0716-0239-3},
    abstract = {SIRIUS 4 is the best-in-class computational tool for metabolite identification from high-resolution tandem mass spectrometry data. It offers de novo molecular formula annotation with outstanding accuracy. When searching fragmentation spectra in a structure database, it reaches over 70{\%} correct identifications. A predicted fingerprint, which indicates the presence or absence of thousands of molecular properties, helps to deduce information about the compound of interest even if it is not contained in any structure database. Here, we present best practices and describe how to leverage the full potential of SIRIUS 4, how to incorporate it into your own workflow, and how it adds value to the analysis of mass spectrometry data beyond spectral library search.},
    booktitle = {Computational Methods and Data Analysis for Metabolomics},
    doi = {10.1007/978-1-0716-0239-3_11},
    url = {https://doi.org/10.1007/978-1-0716-0239-3_11},
    keywords = {jena; SIRIUS; CSI:FingerID; MS; MS/MS;},
    }
  • [DOI] Samuel, S., Shadaydeh, M., Böcker, S., Brügmann, B., Bucher, S. F., Deckert, V., Denzler, J., Dittrich, P., von Eggeling, F., Güllmar, D., Guntinas-Lichius, O., König-Ries, B., Löffler, F., Maicher, L., Marz, M., Migliavacca, M., Reichenbach, J. R., Reichstein, M., Römermann, C. & Wittig, A. (2020) A virtual “`Werkstatt”‘ for digitization in the sciences. IN Research ideas and outcomes, 6.e54106.
    [Bibtex]
    @article{samuel20virtual,
    author = {Sheeba Samuel and Maha Shadaydeh and Sebastian B\"ocker and Bernd Br\"ugmann and Solveig Franziska Bucher and Volker Deckert and Joachim Denzler and Peter Dittrich and Ferdinand von Eggeling and Daniel G\"ullmar and Orlando Guntinas-Lichius and Birgitta K\"onig-Ries and Frank L\"offler and Lutz Maicher and Manja Marz and Mirco Migliavacca and J\"urgen R. Reichenbach and Markus Reichstein and Christine R\"omermann and Andrea Wittig},
    title = {A virtual "`{Werkstatt}"' for digitization in the sciences},
    journal = {Research Ideas and Outcomes},
    volume = {6},
    year = {2020},
    doi = {10.3897/rio.6.e54106},
    publisher = {Pensoft Publishers},
    abstract = {Data is central in almost all scientific disciplines nowadays. Furthermore, intelligent systems have developed rapidly in recent years, so that in many disciplines the expectation is emerging that with the help of intelligent systems, significant challenges can be overcome and science can be done in completely new ways. In order for this to succeed, however, first, fundamental research in computer science is still required, and, second, generic tools must be developed on which specialized solutions can be built. In this paper, we introduce a recently started collaborative project funded by the Carl Zeiss Foundation, a virtual manufactory for digitization in the sciences, the “Werkstatt”, which is being established at the Michael Stifel Center Jena (MSCJ) for data-driven and simulation science to address fundamental questions in computer science and applications. The Werkstatt focuses on three key areas, which include generic tools for machine learning, knowledge generation using machine learning processes, and semantic methods for the data life cycle, as well as the application of these topics in different disciplines. Core and pilot projects address the key aspects of the topics and form the basis for sustainable work in the Werkstatt.},
    pages = {e54106},
    URL = {https://doi.org/10.3897/rio.6.e54106},
    }
  • [DOI] Kaftan, F., Kulkarni, P., Knaden, M., Böcker, S., Hansson, B. S. & Svatoš, A. (2020) Drosophila melanogaster chemical ecology revisited: 2-D distribution maps of sex pheromones on whole virgin and mated flies by mass spectrometry imaging. IN Bmc zoology, 5.4.
    [Bibtex]
    @Article{kaftan20drosophila,
    author = {Kaftan, Filip and Kulkarni, Purva and Knaden, Markus and B\"ocker, Sebastian and Hansson, Bill S. and Svato{\v{s}}, Ale{\v{s}}},
    journal = {BMC Zoology},
    title = {Drosophila melanogaster chemical ecology revisited: {2-D} distribution maps of sex pheromones on whole virgin and mated flies by mass spectrometry imaging},
    year = {2020},
    number = {1},
    pages = {4},
    volume = {5},
    abstract = {Sexual behavior in Drosophila melanogaster flies is greatly influenced by chemical cues. In this study, a spatial distribution of female and male sex pheromones was investigated on the surface of virgin and mated six-day-old fruit flies. Surface analysis was performed using the technique of mass spectrometry imaging (MSI) matrix-assisted laser desorption/ionization – time of flight (MALDI-TOF) and confirmed by gas chromatography – mass spectrometry (GC-MS) analysis of hexane extracts prepared from dissected flies.},
    doi = {10.1186/s40850-020-00053-7},
    keywords = {jena; imaging MS; MSI;},
    owner = {Sebastian},
    refid = {Kaftan2020},
    timestamp = {2020.06.25},
    }
  • [DOI] Ludwig, M., Nothias, L., Dührkop, K., Koester, I., Fleischauer, M., Hoffmann, M. A., Petras, D., Vargas, F., Morsy, M., Aluwihare, L., Dorrestein, P. C. & Böcker, S. (2020) Database-independent molecular formula annotation using Gibbs sampling through ZODIAC. IN Nat mach intell, 2.629–641.
    [Bibtex]
    @Article{ludwig20database-independent,
    author = {Ludwig, Marcus and Nothias, Louis-F\'elix and D\"uhrkop, Kai and Koester, Irina and Fleischauer, Markus and Hoffmann, Martin A. and Petras, Daniel and Vargas, Fernando and Morsy, Mustafa and Aluwihare, Lihini and Dorrestein, Pieter C. and B\"ocker, Sebastian},
    journal = {Nat Mach Intell},
    title = {Database-independent molecular formula annotation using {Gibbs} sampling through {ZODIAC}},
    year = {2020},
    number = {10},
    pages = {629--641},
    volume = {2},
    abstract = {The confident high-throughput identification of small molecules is one of the most challenging tasks in mass spectrometry-based metabolomics. Annotating the molecular formula of a compound is the first step towards its structural elucidation. Yet even the annotation of molecular formulas remains highly challenging. This is particularly so for large compounds above 500 daltons, and for de novo annotations, for which we consider all chemically feasible formulas. Here we present ZODIAC, a network-based algorithm for the de novo annotation of molecular formulas. Uniquely, it enables fully automated and swift processing of complete experimental runs, providing high-quality, high-confidence molecular formula annotations. This allows us to annotate novel molecular formulas that are absent from even the largest public structure databases. Our method re-ranks molecular formula candidates by considering joint fragments and losses between fragmentation trees. We employ Bayesian statistics and Gibbs sampling. Thorough algorithm engineering ensures fast processing in practice. We evaluate ZODIAC on five datasets, producing results substantially (up to 16.5-fold) better than for several other methods, including SIRIUS, which is the state-of-the-art algorithm for molecular formula annotation at present. Finally, we report and verify several novel molecular formulas annotated by ZODIAC.},
    doi = {10.1038/s42256-020-00234-6},
    keywords = {jena; ZODIAC; SIRIUS;},
    owner = {Sebastian},
    timestamp = {2020.11.06},
    }
  • [DOI] Dührkop, K., Nothias, L., Fleischauer, M., Reher, R., Ludwig, M., Hoffmann, M. A., Petras, D., Gerwick, W. H., Rousu, J., Dorrestein, P. C. & Böcker, S. (2020) Systematic classification of unknown metabolites using high-resolution fragmentation mass spectra. IN Nature biotechnology, ..
    [Bibtex]
    @Article{duehrkop20systematic,
    author = {D{\"u}hrkop, Kai and Nothias, Louis-F{\'e}lix and Fleischauer, Markus and Reher, Raphael and Ludwig, Marcus and Hoffmann, Martin A. and Petras, Daniel and Gerwick, William H. and Rousu, Juho and Dorrestein, Pieter C. and B{\"o}cker, Sebastian},
    journal = {Nature Biotechnology},
    title = {Systematic classification of unknown metabolites using high-resolution fragmentation mass spectra},
    year = {2020},
    issn = {1546-1696},
    abstract = {Metabolomics using nontargeted tandem mass spectrometry can detect thousands of molecules in a biological sample. However, structural molecule annotation is limited to structures present in libraries or databases, restricting analysis and interpretation of experimental data. Here we describe CANOPUS (class assignment and ontology prediction using mass spectrometry), a computational tool for systematic compound class annotation. CANOPUS uses a deep neural network to predict 2,497?compound classes from fragmentation spectra, including all biologically relevant classes. CANOPUS explicitly targets compounds for which neither spectral nor structural reference data are available and predicts classes lacking tandem mass spectrometry training data. In evaluation using reference data, CANOPUS reached very high prediction performance (average accuracy of 99.7% in cross-validation) and outperformed four baseline methods. We demonstrate the broad utility of CANOPUS by investigating the effect of microbial colonization in the mouse digestive system, through analysis of the chemodiversity of different Euphorbia plants and regarding the discovery of a marine natural product, revealing biological insights at the compound class level.},
    doi = {10.1038/s41587-020-0740-8},
    keywords = {jena; MS; MS/MS; CANOPUS; compound classes;},
    refid = {Dührkop2020},
    url = {https://doi.org/10.1038/s41587-020-0740-8},
    }

2019

  • [DOI] Dührkop, K., Fleischauer, M., Ludwig, M., Aksenov, A. A., Melnik, A. V., Meusel, M., Dorrestein, P. C., Rousu, J. & Böcker, S. (2019) SIRIUS~4: a rapid tool for turning tandem mass spectra into metabolite structure information. IN Nat methods, 16.299-302.
    [Bibtex]
    @Article{duehrkop19sirius4,
    author = {Kai D\"uhrkop and Markus Fleischauer and Marcus Ludwig and Alexander A. Aksenov and Alexey V. Melnik and Marvin Meusel and Pieter C. Dorrestein and Juho Rousu and Sebastian B\"ocker},
    title = {{SIRIUS~4}: a rapid tool for turning tandem mass spectra into metabolite structure information},
    journal = {Nat Methods},
    year = {2019},
    volume = {16},
    number = {4},
    pages = {299-302},
    abstract = {Mass spectrometry is a predominant experimental technique in metabolomics and related fields, but metabolite structural elucidation remains highly challenging. We report SIRIUS 4 (https://bio.informatik.uni-jena.de/sirius/), which provides a fast computational approach for molecular structure identification. SIRIUS 4 integrates CSI:FingerID for searching in molecular structure databases. Using SIRIUS 4, we achieved identification rates of more than 70% on challenging metabolomics datasets.},
    doi = {10.1038/s41592-019-0344-8},
    keywords = {jena},
    owner = {Sebastian},
    timestamp = {2019-03-18},
    }
  • [DOI] Ludwig, M., Nothias, L., Dührkop, K., Koester, I., Fleischauer, M., Hoffmann, M. A., Petras, D., Vargas, F., Morsy, M., Aluwihare, L., Dorrestein, P. C. & Böcker, S. (2019) ZODIAC: database-independent molecular formula annotation using Gibbs sampling reveals unknown small molecules. IN Biorxiv, ..
    [Bibtex]
    @Article{ludwig19zodiac-preprint,
    author = {Ludwig, Marcus and Nothias, Louis-F{\'e}lix and D{\"u}hrkop, Kai and Koester, Irina and Fleischauer, Markus and Hoffmann, Martin A. and Petras, Daniel and Vargas, Fernando and Morsy, Mustafa and Aluwihare, Lihini and Dorrestein, Pieter C. and B{\"o}cker, Sebastian},
    journal = {bioRxiv},
    title = {{ZODIAC}: database-independent molecular formula annotation using {Gibbs} sampling reveals unknown small molecules},
    year = {2019},
    abstract = {The confident high-throughput identification of small molecules remains one of the most challenging tasks in mass spectrometry-based metabolomics. SIRIUS has become a powerful tool for the interpretation of tandem mass spectra, and shows outstanding performance for identifying the molecular formula of a query compound, being the first step of structure identification. Nevertheless, the identification of both molecular formulas for large compounds above 500 Daltons and novel molecular formulas remains highly challenging. Here, we present ZODIAC, a network-based algorithm for the de novo estimation of molecular formulas. ZODIAC reranks SIRIUS{\textquoteright} molecular formula candidates, combining fragmentation tree computation with Bayesian statistics using Gibbs sampling. Through careful algorithm engineering, ZODIAC{\textquoteright}s Gibbs sampling is very swift in practice. ZODIAC decreases incorrect annotations 16.2-fold on a challenging plant extract dataset with most compounds above 700 Dalton; we then show improvements on four additional, diverse datasets. Our analysis led to the discovery of compounds with novel molecular formulas such as C24H47BrNO8P which, as of today, is not present in any publicly available molecular structure databases.},
    doi = {10.1101/842740},
    publisher = {Cold Spring Harbor Laboratory},
    url = {https://www.biorxiv.org/content/early/2019/11/16/842740},
    keywords = {jena; MS; MS/MS; molecular formulas; ZODIAC},
    }

2018

  • [DOI] Kulkarni, P., Dost, M., Bulut, Ö. D., Welle, A., Böcker, S., Boland, W. & Svatoš, A. (2018) Secondary ion mass spectrometry imaging and multivariate data analysis reveal co-aggregation patterns of populus trichocarpa leaf surface compounds on a micrometer scale. IN The plant journal, 93.193-206.
    [Bibtex]
    @Article{kulkarni18secondary,
    author = {Kulkarni, Purva and Dost, Mina and Bulut, \"Ozg\"ul Demir and Welle, Alexander and B\"ocker, Sebastian and Boland, Wilhelm and Svato{\v{s}}, Ale{\v{s}}},
    title = {Secondary ion mass spectrometry imaging and multivariate data analysis reveal co-aggregation patterns of Populus trichocarpa leaf surface compounds on a micrometer scale},
    journal = {The Plant Journal},
    year = {2018},
    volume = {93},
    number = {1},
    pages = {193-206},
    abstract = {Spatially resolved analysis of a multitude of compound classes has become feasible with the rapid advancement in mass spectrometry imaging strategies. In this study, we present a protocol that combines high lateral resolution TOF-SIMS imaging with a multivariate data analysis (MVA) approach to probe the complex leaf surface chemistry of Populus trichocarpa. Here, epicuticular waxes (EWs) found on the adaxial leaf surface of Populus trichocarpa were blotted on silicon wafers and imaged using time-of-flight secondary ion mass spectrometry (TOF-SIMS) at 10 m and 1 m lateral resolution. Intense M+ and M- molecular ions were clearly visible, which made it possible to resolve the individual compound classes present in EWs. Series of long-chain aliphatic saturated alcohols (C21-C30), hydrocarbons (C25-C33) and wax esters (C44-C48) were clearly observed. These data correlated with the 7Li-chelation matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF MS) analysis, which yielded mostly molecular adduct ions of the analyzed compounds.
    Subsequently, MVA was used to interrogate the TOF-SIMS dataset for identifying hidden patterns on the leafs surface based on its chemical profile. After the application of principal component analysis (PCA), a small number of principal components (PCs) were found to be sufficient to explain maximum variance in the data. To further confirm the contributions from pure components, a 5-factor multivariate curve resolution (MCR) model was applied. Two distinct patterns of small islets, here termed crystals were apparent from the resulting score plots. Based on PCA and MCR results, the crystals were found to be formed by C23 or C29 alcohols. Other less obvious patterns observed in the PCs revealed that the adaxial leaf surface is coated with a relatively homogenous layer of alcohols, hydrocarbons and wax esters.
    The ultra-high resolution TOF-SIMS imaging combined with MVA approach helped to highlight the diverse patterns underlying the leafs surface. Currently, the methods available to analyze the surface chemistry of waxes in conjunction with the spatial information related to the distribution of compounds are limited. This study uses tools that may provide important biological insights into the composition of the wax layer, how this layer is repaired after mechanical damage or insect feeding, and which transport mechanisms are involved in deploying wax constituents to specific regions on the leaf surface.},
    doi = {10.1111/tpj.13763},
    keywords = {jena; SIMS imaging},
    owner = {Purva},
    pmid = {29117637},
    timestamp = {2017-10-24},
    }
  • Dührkop, K., Lataretu, M. A., White, T. W. J. & Böcker, S. (2018) Heuristic algorithms for the Maximum Colorful Subtree problem .
    [Bibtex]
    @TechReport{duehrkop18heuristic-arxiv,
    author = {Kai D\"uhrkop and Marie Anne Lataretu and W. Timothy J. White and Sebastian B\"ocker},
    institution = {arXiv},
    title = {Heuristic algorithms for the {Maximum Colorful Subtree} problem},
    year = {2018},
    number = {arXiv:1801.07456},
    abstract = {In metabolomics, small molecules are structurally elucidated using tandem mass spectrometry (MS/MS); this resulted in the computational Maximum Colorful Subtree problem, which is NP-hard. Unfortunately, data from a single metabolite requires us to solve hundreds or thousands of instances of this problem; and in a single Liquid Chromatography MS/MS run, hundreds or thousands of metabolites are measured.
    Here, we comprehensively evaluate the performance of several heuristic algorithms for the problem against an exact algorithm. We put particular emphasis on whether a heuristic is able to rank candidates such that the correct solution is ranked highly. We propose this ``intermediate'' evaluation because evaluating the approximating quality of heuristics is misleading: Even a slightly suboptimal solution can be structurally very different from the true solution. On the other hand, we cannot structurally evaluate against the ground truth, as this is unknown. We find that particularly one of the heuristics consistently ranks the correct solution in a favorable position. Integrating the heuristic into the analysis pipeline results in a speedup of 10-fold or more, without sacrificing accuracy.},
    keywords = {jena; maximum colorful subtree problem; fragmentation trees; exact algorithms; fpt;},
    owner = {Sebastian},
    timestamp = {2018-01-10},
    url = {https://arxiv.org/abs/1801.07456},
    }
  • [DOI] Ludwig, M., Dührkop, K. & Böcker, S. (2018) Bayesian networks for mass spectrometric metabolite identification via molecular fingerprints. IN Bioinformatics, 34.i333-i340.
    [Bibtex]
    @Article{ludwig18bayesian,
    author = {Marcus Ludwig and Kai D\"uhrkop and Sebastian B\"ocker},
    title = {Bayesian networks for mass spectrometric metabolite identification via molecular fingerprints},
    journal = {Bioinformatics},
    year = {2018},
    volume = {34},
    number = {13},
    pages = {i333-i340},
    note = {Proc.\ of \emph{Intelligent Systems for Molecular Biology} (ISMB 2018).},
    abstract = {Metabolites, small molecules that are involved in cellular reactions, provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually rely on tandem mass spectrometry to identify the thousands of compounds in a biological sample. Recently, we presented CSI:FingerID for searching in molecular structure databases using tandem mass spectrometry data. CSI:FingerID predicts a molecular fingerprint that encodes the structure of the query compound, then uses this to search a molecular structure database such as PubChem. Scoring of the predicted query fingerprint and deterministic target fingerprints is carried out assuming independence between the molecular properties constituting the fingerprint.
    We present a scoring that takes into account dependencies between molecular properties. As before, we predict posterior probabilities of molecular properties using machine learning. Dependencies between molecular properties are modeled as a Bayesian tree network; the tree structure is estimated on the fly from the instance data. For each edge, we also estimate the expected covariance between the two random variables. For fixed marginal probabilities, we then estimate conditional probabilities using the known covariance. Now, the corrected posterior probability of each candidate can be computed, and candidates are ranked by this score. Modeling dependencies improves identification rates of CSI:FingerID by 2.85 percentage points.},
    doi = {10.1093/bioinformatics/bty245},
    keywords = {MS; jena; fingerprints; CSI;},
    owner = {Sebastian},
    pmid = {29949965},
    timestamp = {2018.03.12},
    }
  • [DOI] Alexandrov, T., Böcker, S., Dorrestein, P. & Schymanski, E. (2018) Computational metabolomics: identification, interpretation, imaging (Dagstuhl Seminar 17491). IN Dagstuhl reports, 7.1–17.
    [Bibtex]
    @Article{alexandrov18computational,
    author = {Theodore Alexandrov and Sebastian B{\"o}cker and Pieter Dorrestein and Emma Schymanski},
    journal = {Dagstuhl Reports},
    title = {Computational Metabolomics: Identification, Interpretation, Imaging ({Dagstuhl} {Seminar} 17491)},
    year = {2018},
    issn = {2192-5283},
    number = {12},
    pages = {1--17},
    volume = {7},
    abstract = {Metabolites are key players in almost all biological processes, and play various functional roles providing energy, building blocks, signaling, communication, and defense. Metabolites serve as clinical biomarkers for detecting medical conditions such as cancer; small molecule drugs account for 90% of prescribed therapeutics. Complete understanding of biological systems requires detecting and interpreting the metabolome in time and space. Following in the steps of high-throughput sequencing, mass spectrometry (MS) has become established as a key analytical technique for large-scale studies of complex metabolite mixtures. MS-based experiments generate datasets of increasing complexity and size. The Dagstuhl Seminar on Computational Metabolomics brought together leading experts from the experimental (analytical chemistry and biology) and the computational (computer science and bioinformatics) side, to foster the exchange of expertise needed to advance computational metabolomics. The focus was on a dynamic schedule with overview talks followed by break-out sessions, selected by the participants, covering the whole experimental-computational continuum in mass spectrometry. Particular focus in this seminar was given to imaging mass spectrometry techniques that integrate a spacial component into the analysis, ranging in scale from single cells to organs and organisms.},
    address = {Dagstuhl, Germany},
    doi = {10.4230/DagRep.7.12.1},
    editor = {Theodore Alexandrov and Sebastian B{\"o}cker and Pieter Dorrestein and Emma Schymanski},
    keywords = {jena; Dagstuhl;},
    owner = {Sebastian},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    timestamp = {2018.04.09},
    url = {http://drops.dagstuhl.de/opus/volltexte/2018/8674},
    urn = {urn:nbn:de:0030-drops-86740},
    }
  • [DOI] Fleischauer, M. & Böcker, S. (2018) BCD Beam Search: considering suboptimal partial solutions in Bad Clade Deletion supertrees. IN Peerj, 6.e4987.
    [Bibtex]
    @Article{fleischauer18bcd,
    author = {Markus Fleischauer and Sebastian B\"ocker},
    journal = {PeerJ},
    title = {{BCD} {Beam} {Search}: Considering suboptimal partial solutions in {Bad} {Clade} {Deletion} supertrees},
    year = {2018},
    issn = {2167-8359},
    pages = {e4987},
    volume = {6},
    abstract = {Supertree methods enable the reconstruction of large phylogenies. The supertree problem
    can be formalized in different ways in order to cope with contradictory information in
    the input. Some supertree methods are based on encoding the input trees in a matrix;
    other methods try to find minimum cuts in some graph. Recently, we introduced {Bad} {Clade}
    {Deletion} (BCD) supertrees which combines the graph-based computation of minimum cuts
    with optimizing a global objective function on the matrix representation of the input
    trees. The BCD supertree method has guaranteed polynomial running time and is very swift
    in practice. The quality of reconstructed supertrees was superior to matrix
    representation with parsimony (MRP) and usually on par with SuperFine for simulated
    data; but particularly for biological data, quality of BCD supertrees could not keep up
    with SuperFine supertrees. Here, we present a beam search extension for the BCD
    algorithm that keeps alive a constant number of partial solutions in each top-down
    iteration phase. The guaranteed worst-case running time of the new algorithm is still
    polynomial in the size of the input. We present an exact and a randomized subroutine to
    generate suboptimal partial solutions. Both beam search approaches consistently improve
    supertree quality on all evaluated datasets when keeping 25 suboptimal solutions alive.
    Supertree quality of the BCD Beam Search algorithm is on par with MRP and SuperFine even
    for biological data. This is the best performance of a polynomial-time supertree
    algorithm reported so far.},
    doi = {10.7717/peerj.4987},
    keywords = {jena; supertrees; bcd supertrees; MRC; Matrix representation with parsimony; Split fit; Phylogeny; Phylogenetics; Supermatrix; Supertree; Divide-and-conquer; Minimum cut},
    owner = {Sebastian},
    url = {https://doi.org/10.7717/peerj.4987},
    }
  • [DOI] Bach, E., Szedmak, S., Brouard, C., Böcker, S. & Rousu, J. (2018) Liquid-chromatography retention order prediction for metabolite identification. IN Bioinformatics, 34.i875-i883.
    [Bibtex]
    @Article{bach18liquid-chromatography,
    author = {Eric Bach and Sandor Szedmak and C\'eline Brouard and Sebastian B\"ocker and Juho Rousu},
    title = {Liquid-Chromatography Retention Order Prediction for Metabolite Identification},
    journal = {Bioinformatics},
    year = {2018},
    volume = {34},
    number = {17},
    pages = {i875-i883},
    note = {Proc.\ of \emph{European Conference on Computational Biology} (ECCB 2018)},
    abstract = {Motivation: Liquid Chromatography (LC) followed by tandem Mass Spectrometry (MS/MS) is one of the predominant methods for metabolite identification. In recent years, machine learning has started to transform the analysis of tandem mass spectra and the identification of small molecules. In contrast, LC data is rarely used to improve metabolite identification, despite numerous published methods for retention time prediction using machine learning.
    Results: We present a machine learning method for predicting the retention order of molecules; that is, the order in which molecules elute from the LC column. Our method has important advantages over previous approaches: We show that retention order is much better conserved between instruments than retention time. To this end, our method can be trained using retention time measurements from different LC systems and configurations without tedious preprocessing, significantly increasing the amount of available training data. Our experiments demonstrate that retention order prediction is an effective way to learn retention behaviour of molecules from heterogeneous retention time data. Finally, we demonstrate how retention order prediction and MS/MS-based scores can be combined for more accurate metabolite identifications when analyzing a complete LC-MS/MS run.},
    doi = {10.1093/bioinformatics/bty590},
    keywords = {jena; retention time; RT; machine learning; ML; compound identification; mass spectrometry; MS;},
    owner = {Sebastian},
    timestamp = {2018.06.07},
    }
  • [DOI] Peters, K., Worrich, A., Weinhold, A., Alka, O., Balcke, G., Birkemeyer, C., Bruelheide, H., Calf, O. W., Dietz, S., Dührkop, K., Gaquerel, E., Heinig, U., Kücklich, M., Macel, M., Müller, C., Poeschl, Y., Pohnert, G., Ristok, C., Rodr’i{}guez, V. M., Ruttkies, C., Schuman, M., Schweiger, R., Shahaf, N., Steinbeck, C., Tortosa, M., Treutler, H., Ueberschaar, N., Velasco, P., Weiß{}, B. M., Widdig, A., Neumann, S. & {van Dam}, N. M. (2018) Current challenges in plant eco-metabolomics.. IN Int j mol sci, 19..
    [Bibtex]
    @Article{peters18current,
    author = {Peters, Kristian and Worrich, Anja and Weinhold, Alexander and Alka, Oliver and Balcke, Gerd and Birkemeyer, Claudia and Bruelheide, Helge and Calf, Onno W. and Dietz, Sophie and D\"uhrkop, Kai and Gaquerel, Emmanuel and Heinig, Uwe and K\"ucklich, Marlen and Macel, Mirka and M\"uller, Caroline and Poeschl, Yvonne and Pohnert, Georg and Ristok, Christian and Rodr\'\i{}guez, Victor M. and Ruttkies, Christoph and Schuman, Meredith and Schweiger, Rabea and Shahaf, Nir and Steinbeck, Christoph and Tortosa, Maria and Treutler, Hendrik and Ueberschaar, Nico and Velasco, Pablo and Wei\ss{}, Brigitte M. and Widdig, Anja and Neumann, Steffen and {van Dam}, Nicole M.},
    title = {Current challenges in plant eco-metabolomics.},
    journal = {Int J Mol Sci},
    year = {2018},
    volume = {19},
    number = {5},
    abstract = {The relatively new research discipline of Eco-Metabolomics is the application of metabolomics techniques to ecology with the aim to characterise biochemical interactions of organisms across different spatial and temporal scales. Metabolomics is an untargeted biochemical approach to measure many thousands of metabolites in different species, including plants and animals. Changes in metabolite concentrations can provide mechanistic evidence for biochemical processes that are relevant at ecological scales. These include physiological, phenotypic and morphological responses of plants and communities to environmental changes and also interactions with other organisms. Traditionally, research in biochemistry and ecology comes from two different directions and is performed at distinct spatiotemporal scales. Biochemical studies most often focus on intrinsic processes in individuals at physiological and cellular scales. Generally, they take a bottom-up approach scaling up cellular processes from spatiotemporally fine to coarser scales. Ecological studies usually focus on extrinsic processes acting upon organisms at population and community scales and typically study top-down and bottom-up processes in combination. Eco-Metabolomics is a transdisciplinary research discipline that links biochemistry and ecology and connects the distinct spatiotemporal scales. In this review, we focus on approaches to study chemical and biochemical interactions of plants at various ecological levels, mainly plant-organismal interactions, and discuss related examples from other domains. We present recent developments and highlight advancements in Eco-Metabolomics over the last decade from various angles. We further address the five key challenges: (1) complex experimental designs and large variation of metabolite profiles; (2) feature extraction; (3) metabolite identification; (4) statistical analyses; and (5) bioinformatics software tools and workflows. The presented solutions to these challenges will advance connecting the distinct spatiotemporal scales and bridging biochemistry and ecology.},
    doi = {10.3390/ijms19051385},
    keywords = {jena; review;},
    pmid = {29734799},
    }
  • [DOI] Dührkop, K., Lataretu, M. A., White, T. W. J. & Böcker, S. (2018) Heuristic algorithms for the maximum colorful subtree problem Dagstuhl, Germany, Schloss dagstuhl–leibniz-zentrum fuer informatik, 23:1–23:14.
    [Bibtex]
    @InProceedings{duehrkop18heuristic,
    author = {Kai D\"uhrkop and Marie A. Lataretu and W. Timothy J. White and Sebastian B\"ocker},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2018)},
    title = {Heuristic algorithms for the Maximum Colorful Subtree problem},
    year = {2018},
    address = {Dagstuhl, Germany},
    pages = {23:1--23:14},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    series = {Leibniz International Proceedings in Informatics (LIPIcs)},
    volume = {113},
    abstract = {In metabolomics, small molecules are structurally elucidated using tandem mass spectrometry (MS/MS); this computational task can be formulated as the Maximum
    Colorful Subtree problem, which is NP-hard. Unfortunately, data from a single metabolite requires us to solve hundreds or thousands of instances of this problem---and in a single Liquid Chromatography MS/MS run, hundreds or thousands of metabolites are measured.
    Here, we comprehensively evaluate the performance of several heuristic algorithms for the problem. Unfortunately, as is often the case in bioinformatics, the structure of the (chemically) true solution is not known to us; therefore we can only evaluate against the optimal solution of an instance. Evaluating the quality of a heuristic based on scores can be misleading: Even a slightly suboptimal solution can be structurally very different from the optimal solution, but it is the structure of a solution and not its score that is relevant for the downstream analysis. To this end, we propose a different evaluation setup: Given a set of candidate instances of
    which exactly one is known to be correct, the heuristic in question solves each instance to the best of its ability, producing a score for each instance, which is then used to rank the instances. We then evaluate whether the correct instance is ranked highly by the heuristic.
    We find that one particular heuristic consistently ranks the correct instance in a top position. We also find that the scores of the best heuristic solutions are very close to the optimal score; in contrast, the structure of the solutions can deviate significantly from the optimal structures. Integrating the heuristic allowed us to speed up computations in practice by a factor of 100-fold.},
    doi = {10.4230/LIPIcs.WABI.2018.23},
    isbn = {978-3-95977-082-8},
    issn = {1868-8969},
    keywords = {jena; Maximum Color Subtree problem; heuristics;},
    owner = {Sebastian},
    timestamp = {2018.06.10},
    url = {http://drops.dagstuhl.de/opus/volltexte/2018/9325},
    }
  • [DOI] Deutsch, E. W., Perez-Riverol, Y., Chalkley, R. J., Wilhelm, M., Tate, S., Sachsenberg, T., Walzer, M., Käll, L., Delanghe, B., Böcker, S., Schymanski, E. L., Wilmes, P., Dorfer, V., Kuster, B., Volders, P., Jehmlich, N., Vissers, J. P. C., Wolan, D. W., Wang, A. Y., Mendoza, L., Shofstahl, J., Dowsey, A. W., Griss, J., Salek, R. M., Neumann, S., Binz, P., Lam, H., Vizca{‘i}no, J. A., Bandeira, N. & Röst, H. (2018) Expanding the use of spectral libraries in proteomics. IN J proteome res, 17.4051-4060.
    [Bibtex]
    @Article{deutsch18expanding,
    author = {Deutsch, Eric W and Perez-Riverol, Yasset and Chalkley, Robert J and Wilhelm, Mathias and Tate, Stephen and Sachsenberg, Timo and Walzer, Mathias and K\"all, Lukas and Delanghe, Bernard and B\"ocker, Sebastian and Schymanski, Emma L and Wilmes, Paul and Dorfer, Viktoria and Kuster, Bernhard and Volders, Pieter-Jan and Jehmlich, Nico and Vissers, Johannes P C and Wolan, Dennis W and Wang, Ana Y and Mendoza, Luis and Shofstahl, Jim and Dowsey, Andrew W and Griss, Johannes and Salek, Reza M and Neumann, Steffen and Binz, Pierre-Alain and Lam, Henry and Vizca{\'\i}no, Juan Antonio and Bandeira, Nuno and R\"ost, Hannes},
    title = {Expanding the use of spectral libraries in proteomics},
    journal = {J Proteome Res},
    year = {2018},
    volume = {17},
    number = {12},
    pages = {4051-4060},
    abstract = {The 2017 Dagstuhl Seminar on Computational Proteomics provided an opportunity for a broad discussion on the current state and future directions of the generation and use of peptide tandem mass spectrometry spectral libraries. Their use in proteomics is growing slowly, but there are multiple challenges in the field that must be addressed to further increase the adoption of spectral libraries and related techniques. The primary bottlenecks are the paucity of high quality and comprehensive libraries and the general difficulty of adopting spectral library searching into existing workflows. There are several existing spectral library formats, but none capture a satisfactory level of metadata; therefore a logical next improvement is to design a more advanced, Proteomics Standards Initiative-approved spectral library format that can encode all of the desired metadata. The group discussed a series of metadata requirements organized into three designations of completeness or quality, tentatively dubbed bronze, silver, and gold. The metadata can be organized at four different levels of granularity: at the collection (library) level, at the individual entry (peptide ion) level, at the peak (fragment ion) level, and at the peak annotation level. Strategies for encoding mass modifications in a consistent manner and the requirement for encoding high-quality and commonly-seen but as-yet-unidentified spectra were discussed. The group also discussed related topics, including strategies for comparing two spectra, techniques for generating representative spectra for a library, approaches for selection of optimal signature ions for targeted workflows, and issues surrounding the merging of two or more libraries into one. We present here a review of this field and the challenges that the community must address in order to accelerate the adoption of spectral libraries in routine analysis of proteomics datasets.},
    doi = {10.1021/acs.jproteome.8b00485},
    keywords = {jena; Dagstuhl;},
    pmid = {30270626},
    pubstatus = {aheadofprint},
    revised = {2018-10-01},
    timestamp = {2018.10.15},
    }
  • Fleischauer, M. (2018) Fast and accurate supertrees: towards large scale phylogenies. PhD Thesis. Jena; Germany, .
    [Bibtex]
    @PhdThesis{fleischauer18fast,
    author = {Fleischauer, Markus},
    title = {Fast and accurate supertrees: towards large scale phylogenies},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2018},
    address = {Jena; Germany},
    file = {:2018/Fleischauer_FastAndAccurateSupertrees_PhdThesis_2018.pdf:PDF},
    keywords = {jena; phylogenetics; Bad Clade Deletion; supertrees;},
    url = {https://www.db-thueringen.de/receive/dbt_mods_00035164},
    }
  • Dührkop, K. (2018) Computational methods for small molecule identification. PhD Thesis. Jena; Germany, .
    [Bibtex]
    @PhdThesis{duehrkop18computational,
    author = {D\"uhrkop, Kai},
    title = {Computational Methods for Small Molecule Identification},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2018},
    address = {Jena; Germany},
    file = {:2018/Duehrkop_ComputationalMethodsSmallMoleculeIdentification_PhdThesis_2018.pdf:PDF},
    keywords = {jena; CSI:FingerID; MS; small molecules; isotope patterns; SIRIUS; fragmentation trees; FT;},
    }
  • Kulkarni, P. (2018) Computational methods for the analysis of mass spectrometry imaging data. PhD Thesis. Jena; Germany, .
    [Bibtex]
    @PhdThesis{kulkarni18computational,
    author = {Kulkarni, Purva},
    title = {Computational Methods for the Analysis of Mass Spectrometry Imaging Data},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2018},
    address = {Jena; Germany},
    file = {:2018/Kulkarni_ComputationalMethodsMassSpectrometryImaging_PhdThesis_2018.pdf:PDF},
    keywords = {jena; MS; small molecules; imaging; MSICorrect},
    }

2017

  • [DOI] Böcker, S. (2017) Searching molecular structure databases using tandem MS data: are we there yet?. IN Curr opin chem biol, 36.1-6.
    [Bibtex]
    @Article{boecker17searching,
    author = {Sebastian B\"ocker},
    title = {Searching molecular structure databases using tandem {MS} data: are we there yet?},
    journal = {Curr Opin Chem Biol},
    year = {2017},
    volume = {36},
    pages = {1-6},
    issn = {1367-5931},
    abstract = {Untargeted metabolomics experiments usually rely on tandem mass spectrometry (MS/MS) to identify the thousands of compounds in a complex sample. Spectral libraries used for identification are incomplete, and many metabolites remain unknown. There has been a recent development to replace spectral libraries by molecular structure databases when searching the MS/MS data of the unknown compound. Several tools have been developed for this task, including CFM-ID, MetFrag, MAGMa(+), FingerID and CSI:FingerID. These methods are already helpful for everyday metabolomics; with further advances, these methods can become indispensable tools for tomorrow's metabolomics. Here, I discuss several questions related to this task, such as: Why not wait for spectral libraries to grow sufficiently? Why evaluate methods outside their 'comfort zone'? Should we use prior information such as citation frequencies? And, ultimately: are we there yet?},
    doi = {10.1016/j.cbpa.2016.12.010},
    keywords = {jena; MS; review;},
    owner = {Sebastian},
    pmid = {28025165},
    timestamp = {2017.01.02},
    url = {https://authors.elsevier.com/a/1UF-u4sz6LvFfY},
    }
  • [DOI] Bartels, B., Kulkarni, P., Danz, N., Böcker, S., Saluz, H. P. & Svatoš, A. (2017) Mapping metabolites from rough terrain: laser ablation electrospray ionization on non-flat samples. IN Rsc adv, 7.9045-9050.
    [Bibtex]
    @Article{bartels17mapping,
    author = {Benjamin Bartels and Purva Kulkarni and Norbert Danz and Sebastian B\"ocker and Hans Peter Saluz and Ale\v{s} Svato\v{s}},
    title = {Mapping metabolites from rough terrain: laser ablation electrospray ionization on non-flat samples},
    journal = {RSC Adv},
    year = {2017},
    volume = {7},
    pages = {9045-9050},
    abstract = {Established laser-based ionization experiments require the surface of a sample to be as flat as possible to guarantee optimal laser focus. A laser ablation electrospray ionization (LAESI) source was custom-built to accommodate the topography of non-flat sample surfaces. Employing a confocal distance sensor, a height profile of the surface in question was recorded prior to the actual ionization experiment. The robustness of the system was evaluated by the metabolic profiling of radish (Raphanus sativus) leaves, chosen due to their pronounced surface features and known content of specialized metabolites. After the ionization experiments, light microscopy imaging was performed to evaluate ablation crater size and position. Reproducible ablation mark diameters of 69 � 7 micrometer in average have been achieved. Mass spectrometric imaging capability has been proven on R. sativus leaf samples as well.},
    doi = {10.1039/C6RA26854D},
    keywords = {jena; MS; mass spectrometry; imaging MS;},
    owner = {Sebastian},
    timestamp = {2017.01.28},
    url = {http://pubs.rsc.org/en/content/articlehtml/2017/ra/c6ra26854d},
    }
  • Brouard, C., Bach, E., Böcker, S. & Rousu, J. (2017) Magnitude-preserving ranking for structured outputs Pmlr, 407–422.
    [Bibtex]
    @InProceedings{brouard17magnitude-preserving,
    author = {C\'{e}line Brouard and Eric Bach and Sebastian B\"{o}cker and Juho Rousu},
    title = {Magnitude-Preserving Ranking for Structured Outputs},
    booktitle = {Proc. of Asian Conference on Machine Learning},
    year = {2017},
    volume = {77},
    series = {Proceedings of Machine Learning Research},
    pages = {407--422},
    publisher = {PMLR},
    abstract = {In this paper, we present a novel method for solving structured prediction problems, based on combining Input Output Kernel Regression (IOKR) with an extension of magnitude-preserving ranking to structured output spaces. In particular, we concentrate on the case where a set of candidate outputs has been given, and the associated pre-image problem calls for ranking the set of candidate outputs. Our method, called magnitude-preserving IOKR, both aims to produce a good approximation of the output feature vectors, and to preserve the magnitude differences of the output features in the candidate sets. For the case where the candidate set does not contain corresponding 'correct' inputs, we propose a method for approximating the inputs through application of IOKR in the reverse direction. We apply our method to two learning problems: cross-lingual document retrieval and metabolite identification. Experiments show that the proposed approach improves performance over IOKR, and in the latter application obtains the current state-of-the-art accuracy.},
    file = {brouard17a.pdf:http\://proceedings.mlr.press/v77/brouard17a/brouard17a.pdf:PDF},
    keywords = {jena; ML; MS; MS/MS; IOKR; CSI:FingerID;},
    url = {http://proceedings.mlr.press/v77/brouard17a.html},
    }
  • [DOI] Engler, M. S., Scheubert, K., Schubert, U. S. & Böcker, S. (2017) Exploring the limits of the geometric copolymerization model. IN Polymers, 9.101.
    [Bibtex]
    @Article{engler17exploring,
    author = {Engler, Martin S. and Scheubert, Kerstin and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {Exploring the Limits of the Geometric Copolymerization Model},
    journal = {Polymers},
    year = {2017},
    volume = {9},
    pages = {101},
    abstract = {The Geometric copolymerization model is a recently introduced statistical Markov chain model. Here, we investigate its practicality. First, several approaches to identify the optimal model parameters from observed copolymer fingerprints are evaluated using Monte-Carlo simulated data. Directly optimizing the parameters is robust against noise but has impractically long running times. A compromise between robustness and running time is found by exploiting the relationship between monomer concentrations calculated by ordinary differential equations and the Geometric model. Second, we investigate the applicability of the model to copolymerizations beyond living polymerization and show that the model is useful for copolymerizations involving termination and depropagation reactions.},
    comment = {Eigentlich das "fitting MS1" paper, aber sehr sehr kurz},
    doi = {10.3390/polym9030101},
    keywords = {polymer ms; jena;},
    owner = {martin},
    timestamp = {2016.04.19},
    }
  • [DOI] Fleischauer, M. & Böcker, S. (2017) Bad Clade Deletion supertrees: a fast and accurate supertree algorithm. IN Mol biol evol, 34.2408-2421.
    [Bibtex]
    @Article{fleischauer17bad,
    author = {Markus Fleischauer and Sebastian B\"ocker},
    title = {{B}ad {C}lade {D}eletion Supertrees: A Fast and Accurate Supertree Algorithm},
    journal = {Mol Biol Evol},
    year = {2017},
    volume = {34},
    pages = {2408-2421},
    abstract = {Supertree methods merge a set of overlapping phylogenetic trees into a supertree containing all taxa of the input trees. The challenge in supertree reconstruction is the way of dealing with conflicting information in the input trees. Many different algorithms for different objective functions have been suggested to resolve these conflicts. In particular, there exist methods based on encoding the source trees in a matrix, where the supertree is constructed applying a local search heuristic to optimize the respective objective function. We present a novel heuristic supertree algorithm called Bad Clade Deletion (BCD) supertrees. It uses minimum cuts to delete a locally minimal number of columns from such a matrix representation so that it is compatible. This is the complement problem to Matrix Representation with Compatibility (Maximum Split Fit). Our algorithm has guaranteed polynomial worst-case running time and performs swiftly in practice. Different from local search heuristics, it guarantees to return the directed perfect phylogeny for the input matrix, corresponding to the parent tree of the input trees, if one exists. Comparing supertrees to model trees for simulated data, BCD shows a better accuracy (F1-score) than the state-of-the-art algorithms SuperFine (up to 3\%) and Matrix Representation with Parsimony (up to 7\%); at the same time, BCD is up to 7 times faster than SuperFine, and up to 600 times faster than Matrix Representation with Parsimony. Finally, using the BCD supertree as a starting tree for a combined Maximum Likelihood analysis using RAxML, we reach significantly improved accuracy (1\% higher F1-score) and running time (1.7-fold speedup).},
    doi = {10.1093/molbev/msx191},
    keywords = {jena; supertrees; bcd supertrees;},
    owner = {Sebastian},
    pmid = {28873954},
    timestamp = {2017.06.25},
    }
  • [DOI] Hufsky, F. & Böcker, S. (2017) Mining molecular structure databases: identification of small molecules based on fragmentation mass spectrometry data. IN Mass spectrom rev, 36.624-633.
    [Bibtex]
    @Article{hufsky17mining,
    author = {Franziska Hufsky and Sebastian B\"ocker},
    title = {Mining molecular structure databases: Identification of small molecules based on fragmentation mass spectrometry data},
    journal = {Mass Spectrom Rev},
    year = {2017},
    volume = {36},
    number = {5},
    pages = {624-633},
    abstract = {Mass spectrometry (MS) is a key technology for the analysis of small molecules. For the identification and structural elucidation of novel molecules, new approaches beyond straightforward spectral comparison are required. In this review, we will cover computational methods that help with the identification of small molecules by analyzing fragmentation MS data. We focus on the four main approaches to mine a database of metabolite structures, that is rule-based fragmentation spectrum prediction,
    combinatorial fragmentation, competitive fragmentation modeling, and molecular fingerprint prediction.},
    doi = {10.1002/mas.21489},
    keywords = {jena},
    owner = {Sebastian},
    pmid = {26763615},
    timestamp = {2015.12.16},
    }
  • [DOI] Scheubert, K., Hufsky, F., Petras, D., Wang, M., Nothias, L., Dührkop, K., Bandeira, N., Dorrestein, P. C. & Böcker, S. (2017) Significance estimation for large scale metabolomics annotations by spectral matching. IN Nat commun, 8.1494.
    [Bibtex]
    @Article{scheubert17significance,
    author = {Scheubert, Kerstin and Hufsky, Franziska and Petras, Daniel and Wang, Mingxun and Nothias, Louis-F\'elix and D\"uhrkop, Kai and Bandeira, Nuno and Dorrestein, Pieter C. and B\"ocker, Sebastian},
    title = {Significance estimation for large scale metabolomics annotations by spectral matching},
    journal = {Nat Commun},
    year = {2017},
    volume = {8},
    pages = {1494},
    abstract = {The annotation of small molecules in untargeted mass spectrometry relies on the matching of fragment spectra to reference library spectra. While various spectrum-spectrum match scores exist, the field lacks statistical methods for estimating the false discovery rates (FDR) of these annotations. We present empirical Bayes and target-decoy based methods to estimate the false discovery rate (FDR) for 70 public metabolomics data sets. We show that the spectral matching settings need to be adjusted for each project. By adjusting the scoring parameters and thresholds, the number of annotations rose, on average, by +139% (ranging from -92 up to +5705%) when compared with a default parameter set available at GNPS. The FDR estimation methods presented will enable a user to assess the scoring criteria for large scale analysis of mass spectrometry based metabolomics data that has been essential in the advancement of proteomics, transcriptomics, and genomics science.},
    doi = {10.1038/s41467-017-01318-5},
    keywords = {jena; FDR;},
    owner = {Sebastian},
    pmid = {29133785},
    timestamp = {2017.04.03},
    }
  • [DOI] Schymanski, E. L., Ruttkies, C., Krauss, M., Brouard, C., Kind, T., Dührkop, K., Allen, F. R., Vaniya, A., Verdegem, D., Böcker, S., Rousu, J., Shen, H., Tsugawa, H., Sajed, T., Fiehn, O., Ghesquière, B. & Neumann, S. (2017) Critical Assessment of Small Molecule Identification 2016: automated methods. IN J cheminf, 9.22.
    [Bibtex]
    @Article{schymanski17critical,
    author = {Emma Louise Schymanski and Christoph Ruttkies and Martin Krauss and C\'{e}line Brouard and Tobias Kind and Kai D\"{u}hrkop and Felicity Ruth Allen and Arpana Vaniya and Dries Verdegem and Sebastian B\"{o}cker and Juho Rousu and Huibin Shen and Hiroshi Tsugawa and Tanvir Sajed and Oliver Fiehn and Bart Ghesqui\`{e}re and Steffen Neumann},
    title = {{Critical} {Assessment} of {Small} {Molecule} {Identification} 2016: Automated Methods},
    journal = {J Cheminf},
    year = {2017},
    volume = {9},
    pages = {22},
    abstract = {Background: The fourth round of the Critical Assessment of Small Molecule Identification (CASMI) Contest (www.casmi-contest.org) was held in 2016, with two new categories for automated methods. This article covers the 208 challenges in Categories 2 and 3, without and with metadata, from organization, participation, results and post-contest evaluation of CASMI 2016 through to perspectives for future contests and small molecule annotation/identification.
    Results: The Input Output Kernel Regression (CSI:IOKR) machine learning approach performed best in "Category 2: Best Automatic Structural Identification--In Silico Fragmentation Only", won by Team Brouard with 41% challenge wins. The winner of "Category 3: Best Automatic Structural Identification--Full Information" was Team Kind (MS-FINDER), with 76% challenge wins. The best methods were able to achieve over 30% Top 1 ranks in Category 2, with all methods ranking the correct candidate in the Top 10 in around 50% of challenges. This success rate rose to 70% Top 1 ranks in Category 3, with candidates in the Top 10 in over 80% of the challenges. The machine learning and chemistry-based approaches are shown to perform in complementary ways.
    Conclusions: The improvement in (semi-)automated fragmentation methods for small molecule identification has been substantial. The achieved high rates of correct candidates in the Top 1 and Top 10, despite large candidate numbers, open up great possibilities for high-throughput annotation of untargeted analysis for "known unknowns". As more high quality training data becomes available, the improvements in machine learning methods will likely continue, but the alternative approaches still provide valuable complementary information. Improved integration of experimental context will also improve identification success further for "real life" annotations. The true "unknown unknowns" remain to be evaluated in future CASMI contests.},
    doi = {10.1186/s13321-017-0207-1},
    keywords = {jena; CASMI; MS; metabolites; tandem MS; MS/MS;},
    owner = {Sebastian},
    pmid = {29086042},
    timestamp = {2016.12.15},
    }
  • [DOI] Barbosa, E., Röttger, R., Hauschild, A., {de Castro Soares}, S., Böcker, S., Azevedo, V. & Baumbach, J. (2017) Lifestyle-specific-islands (LiSSI): integrated bioinformatics platform for genomic island analysis. IN J integr bioinform, 14.2017-10.
    [Bibtex]
    @Article{barbosa17lifestyle,
    author = {Barbosa, Eudes and R\"ottger, Richard and Hauschild, Anne-Christin and {de Castro Soares}, Siomar and B\"ocker, Sebastian and Azevedo, Vasco and Baumbach, Jan},
    title = {LifeStyle-Specific-Islands ({LiSSI}): Integrated Bioinformatics Platform for Genomic Island Analysis},
    journal = {J Integr Bioinform},
    year = {2017},
    volume = {14},
    number = {2},
    pages = {2017-0010},
    abstract = {Distinct bacteria are able to cope with highly diverse lifestyles; for instance, they can be free living or host-associated. Thus, these organisms must possess a large and varied genomic arsenal to withstand different environmental conditions. To facilitate the identification of genomic features that might influence bacterial adaptation to a specific niche, we introduce LifeStyle-Specific-Islands (LiSSI). LiSSI combines evolutionary sequence analysis with statistical learning (Random Forest with feature selection, model tuning and robustness analysis). In summary, our strategy aims to identify conserved consecutive homology sequences (islands) in genomes and to identify the most discriminant islands for each lifestyle.},
    created = {2017-07-05},
    doi = {10.1515/jib-2017-0010},
    keywords = {jena; Gene Clusters; Bacteria; Homologous genes; Island; Lifestyle; Machine Learning},
    owner = {NLM},
    pmid = {28678736},
    revised = {2017-07-28},
    timestamp = {2017.11.15},
    }
  • White, T. W. J., Ludwig, M. & Böcker, S. (2017) Exact and heuristic algorithms for cograph editing .
    [Bibtex]
    @TechReport{white17exact,
    author = {W. Timothy J. White and Marcus Ludwig and Sebastian B\"ocker},
    institution = {arXiv},
    title = {Exact and heuristic algorithms for Cograph Editing},
    year = {2017},
    number = {arXiv:1711.05839},
    abstract = {We present a dynamic programming algorithm for optimally solving the Cograph Editing problem on an $n$-vertex graph that runs in $O(3^n n)$ time and uses $O(2^n)$ space. In this problem, we are given a graph $G = (V, E)$ and the task is to find a smallest possible set $F \subseteq V \times V$ of vertex pairs such that $(V, E \bigtriangleup F)$ is a cograph (or $P_4$-free graph), where $\bigtriangleup$ represents the symmetric difference operator. We also describe a technique for speeding up the performance of the algorithm in practice.},
    keywords = {jena; cograph editing; fpt;},
    owner = {Sebastian},
    timestamp = {2017.12.14},
    url = {https://arxiv.org/abs/1711.05839},
    }

2016

  • [DOI] Böcker, S., Rousu, J. & Schymanski, E. (2016) Computational Metabolomics (Dagstuhl Seminar 15492). IN Dagstuhl reports, 5.180–192.
    [Bibtex]
    @Article{boecker16computational,
    author = {Sebastian B{\"o}cker and Juho Rousu and Emma Schymanski},
    title = {{Computational Metabolomics} ({Dagstuhl Seminar} 15492)},
    journal = {Dagstuhl Reports},
    year = {2016},
    volume = {5},
    number = {11},
    pages = {180--192},
    issn = {2192-5283},
    address = {Dagstuhl, Germany},
    doi = {10.4230/DagRep.5.11.180},
    editor = {Sebastian B{\"o}cker and Juho Rousu and Emma Schymanski},
    keywords = {jena; algorithms, bioinformatics, cheminformatics, computational mass spectrometry, computational metabolomics, databases},
    owner = {Sebastian},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    timestamp = {2016.04.07},
    url = {http://drops.dagstuhl.de/opus/volltexte/2016/5801},
    }
  • [DOI] Böcker, S. & Dührkop, K. (2016) Fragmentation trees reloaded. IN J cheminform, 8.5.
    [Bibtex]
    @Article{boecker16fragmentation,
    author = {Sebastian B\"ocker and Kai D\"uhrkop},
    title = {Fragmentation trees reloaded},
    journal = {J Cheminform},
    year = {2016},
    volume = {8},
    pages = {5},
    abstract = {Untargeted metabolomics commonly uses liquid chromatography mass spectrometry to measure abundances of metabolites; subsequent tandem mass spectrometry is used to derive information about individual compounds. One of the bottlenecks in this experimental setup is the interpretation of fragmentation spectra to accurately and efficiently identify compounds. Fragmentation trees have become a powerful tool for the interpretation of tandem mass spectrometry data of small molecules. These trees are determined from the data using combinatorial optimization, and aim at explaining the experimental data via fragmentation cascades. Fragmentation tree computation does not require spectral or structural databases. To obtain biochemically meaningful trees, one needs an elaborate optimization function (scoring).
    We present a new scoring for computing fragmentation trees, transforming the combinatorial optimization into a Maximum A Posteriori estimator. We demonstrate the superiority of the new scoring for two tasks: Both for the de novo identification of molecular formulas of unknown compounds, and for searching a database for structurally similar compounds, our method SIRIUS 3, performs significantly better than the previous version of our method, as well as other methods for this task.
    SIRIUS 3 can be a part of an untargeted metabolomics workflow, allowing researchers to investigate unknowns using automated computational methods.},
    doi = {10.1186/s13321-016-0116-8},
    keywords = {jena; FT; fragmentation trees;},
    owner = {Sebastian},
    pmid = {26839597},
    timestamp = {2013.06.10},
    url = {http://www.jcheminf.com/content/8/1/5},
    }
  • [DOI] Brouard, C., Shen, H., Dührkop, K., d’Alché-Buc, F., Böcker, S. & Rousu, J. (2016) Fast metabolite identification with input output kernel regression. IN Bioinformatics, 32.i28-i36.
    [Bibtex]
    @Article{brouard16fast,
    author = {C{\'{e}}line Brouard and Huibin Shen and Kai D{\"{u}}hrkop and Florence {d'Alch{\'{e}}-Buc} and Sebastian B{\"{o}}cker and Juho Rousu},
    title = {Fast metabolite identification with Input Output Kernel Regression},
    journal = {Bioinformatics},
    year = {2016},
    volume = {32},
    number = {12},
    pages = {i28-i36},
    note = {Proc.\ of \emph{Intelligent Systems for Molecular Biology} (ISMB 2016)},
    abstract = {MOTIVATION: An important problematic of metabolomics is to identify metabolites using tandem mass spectrometry data. Machine learning methods have been proposed recently to solve this problem by predicting molecular fingerprint vectors and matching these fingerprints against existing molecular structure databases. In this work we propose to address the metabolite identification problem using a structured output prediction approach. This type of approach is not limited to vector output space and can handle structured output space such as the molecule space.
    RESULTS: We use the Input Output Kernel Regression method to learn the mapping between tandem mass spectra and molecular structures. The principle of this method is to encode the similarities in the input (spectra) space and the similarities in the output (molecule) space using two kernel functions. This method approximates the spectra-molecule mapping in two phases. The first phase corresponds to a regression problem from the input space to the feature space associated to the output kernel. The second phase is a preimage problem, consisting in mapping back the predicted output feature vectors to the molecule space. We show that our approach achieves state-of-the-art accuracy in metabolite identification. Moreover, our method has the advantage of decreasing the running times for the training step and the test step by several orders of magnitude over the preceding methods.},
    doi = {10.1093/bioinformatics/btw246},
    file = {pdf:2016/BrouardEtAl_FastMetaboliteIdentification_Bioinformatics_2016:PDF},
    keywords = {jena; MS; tandem MS; CSI:FingerID; kernel methods; IOKR;},
    owner = {Sebastian},
    pmid = {27307628},
    timestamp = {2016.02.20},
    }
  • [DOI] Engler, M. S., Crotty, S., Barthel, M. J., Pietsch, C., Schubert, U. S. & Böcker, S. (2016) Abundance correction for mass discrimination effects in polymer mass spectra. IN Rapid commun mass spectrom, 30.1233–1241.
    [Bibtex]
    @Article{engler16abundance,
    author = {Engler, Martin S. and Crotty, Sarah and Barthel, Markus J and Pietsch, Christian and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {Abundance correction for mass discrimination effects in polymer mass spectra},
    journal = {Rapid Commun Mass Spectrom},
    year = {2016},
    volume = {30},
    pages = {1233--1241},
    abstract = {RATIONALE: Matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOFMS) is frequently used to analyze homo- and copolymers, i.e. for computing copolymer fingerprints. However, the oligomer abundances are influenced by mass discrimination, i.e. mass- and composition-dependent ionization. We have developed a computational method to correct the abundance bias caused by the mass discrimination.
    METHODS: MALDI-TOFMS in combination with computational methods was used to investigate three random copolymers with different ratios of styrene and isoprene. Furthermore, equimolar high- and low-mass styrene and isoprene homopolymers (2500 and 4200 Da) were mixed and also analyzed by MALDI-TOFMS. The abundances of both copolymers and homopolymers were corrected for mass discrimination effects with our new method.
    RESULTS: The novel computational method was integrated into the existing COCONUT software. The method was demonstrated using the measured styrene and isoprene co- and homopolymers. First, the method was applied to homopolymer spectra. Subsequently, the copolymer fingerprint was computed from the copolymer MALDI mass spectra and the correcting function applied. The changes in the composition are plausible, indicating that correction of copolymer abundances was reasonable.
    CONCLUSIONS: Our computational method may help to avoid erroneous conclusions when analyzing copolymer MS spectra. The software is freely available and represents a step towards comprehensive computational support in polymer science.},
    doi = {10.1002/rcm.7553},
    file = {EnglerEtAl_AbundanceCorrectionMass_RCMS_2016.pdf:2016/EnglerEtAl_AbundanceCorrectionMass_RCMS_2016.pdf:PDF},
    keywords = {polymer ms; jena;},
    owner = {martin},
    timestamp = {2016.03.08},
    }
  • [DOI] Engler, M. S., Scheubert, K., Schubert, U. S. & Böcker, S. (2016) New statistical models for copolymerization. IN Polymers, 8.240.
    [Bibtex]
    @Article{engler16statistical,
    author = {Engler, Martin S. and Scheubert, Kerstin and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {New Statistical Models for Copolymerization},
    journal = {Polymers},
    year = {2016},
    volume = {8},
    number = {6},
    pages = {240},
    abstract = {For many years, copolymerization has been studied using mathematical and statistical models. Here, we present new Markov chain models for copolymerization kinetics: the Bernoulli and Geometric models. They model copolymer synthesis as a random process and are based on a basic reaction scheme. In contrast to previous Markov chain approaches to copolymerization, both models take variable chain lengths and time-dependent monomer probabilities into account and allow for computing sequence likelihoods and copolymer fingerprints. Fingerprints can be computed from copolymer mass spectra, potentially allowing us to estimate the model parameters from measured fingerprints. We compare both models against Monte Carlo simulations. We find that computing the models is fast and memory efficient.},
    doi = {10.3390/polym8060240},
    file = {EnglerEtAl_NewStatisticalModels_Polymers_2016.pdf:2016/EnglerEtAl_NewStatisticalModels_Polymers_2016.pdf:PDF},
    keywords = {polymer ms; jena; linkPDF},
    owner = {martin},
    timestamp = {2016.04.19},
    url = {http://www.mdpi.com/2073-4360/8/6/240},
    }
  • [DOI] Fleischauer, M. & Böcker, S. (2016) Collecting reliable clades using the greedy strict consensus merger. IN Peerj, 4.e2172.
    [Bibtex]
    @Article{fleischauer16collecting,
    author = {Fleischauer, Markus and B{\"o}cker, Sebastian},
    title = {Collecting reliable clades using the Greedy Strict Consensus Merger},
    journal = {PeerJ},
    year = {2016},
    volume = {4},
    pages = {e2172},
    issn = {2167-8359},
    abstract = {Supertree methods combine a set of phylogenetic trees into a single supertree. Similar to supermatrix methods, these methods provide a way to reconstruct larger parts of the Tree of Life, potentially evading the computational complexity of phylogenetic inference methods such as maximum likelihood. The supertree problem can be formalized in different ways, to cope with contradictory information in the input. Many supertree methods have been developed. Some of them solve NP-hard optimization problems like the well-known Matrix Representation with Parsimony, while others have polynomial worst-case running time but work in a greedy fashion (FlipCut). Both can profit from a set of clades that are already known to be part of the supertree. The Superfine approach shows how the Greedy Strict Consensus Merger (GSCM) can be used as preprocessing to find these clades. We introduce different scoring functions for the GSCM, a randomization, as well as a combination thereof to improve the GSCM to find more clades. This helps, in turn, to improve the resolution of the GSCM supertree. We find this modifications to increase the number of true positive clades by 18\% compared to the currently used Overlap scoring.},
    doi = {10.7717/peerj.2172},
    file = {FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2016:2016/FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2016.pdf:PDF},
    keywords = {jena; supertree; phylogenetics; scm; Consensus; Supertree; Supermatrix; Divide and Conquer; FlipCut; Phylogeny},
    optmonth = jun,
    owner = {fleisch},
    pmid = {27375971},
    timestamp = {2016.06.29},
    url = {https://doi.org/10.7717/peerj.2172},
    }
  • [DOI] Meusel, M., Hufsky, F., Panter, F., Krug, D., Müller, R. & Böcker, S. (2016) Predicting the presence of uncommon elements in unknown biomolecules from isotope patterns. IN Anal chem, 88.7556-7566.
    [Bibtex]
    @Article{meusel16predicting,
    author = {Marvin Meusel and Franziska Hufsky and Fabian Panter and Daniel Krug and Rolf M\"uller and Sebastian B\"ocker},
    title = {Predicting the presence of uncommon elements in unknown biomolecules from isotope patterns},
    journal = {Anal Chem},
    year = {2016},
    volume = {88},
    number = {15},
    pages = {7556-7566},
    abstract = {Motivation: The determination of the molecular formula is one of the earliest and most important steps when investigating the chemical nature of an unknown compound. Common approaches use the isotopic pattern of a compound measured using mass spectrometry. Computational methods to determine the molecular formula from this isotopic pattern require a fixed set of elements. Considering all possible elements severely increases running times and more importantly the chance for false positive identifications as the number of candidate formulas for a given target mass rises significantly if the constituting elements are not pre-filtered. This negative effect grows stronger for compounds of higher molecular mass as the effect of a single atom on the overall isotopic pattern grows smaller. On the other hand, hand-selected restrictions on this set of elements may prevent the identification of the correct molecular formula. Thus, it is a crucial step to determine the set of elements most likely comprising the compound prior to the assignment of an elemental formula to an exact mass.
    Results: In this paper, we present a method to determine the presence of certain elements (sulfur, chlorine, bromine, boron and selenium) in the compound from its (high mass accuracy) isotopic pattern. We limit ourselves to biomolecules, in the sense of products from nature or synthetic products with potential bioactivity. The classifiers developed here predict the presence of an element with a very high sensitivity and high specificity. We evaluate classifiers on three real-world datasets with 663 isotope patterns in total: 184 isotope patterns containing sulfur, 187 containing chlorine, 14 containing bromine, one containing boron, one containing selenium. In no case do we make a false negative prediction; for chlorine, bromine, boron, and selenium, we make ten false positive predictions in total. We also demonstrate the impact of our method on the identification of molecular formulas, in particular the number of considered candidates and running time.},
    doi = {10.1021/acs.analchem.6b01015},
    keywords = {jena; MS; MS1; isotope pattern;},
    owner = {Sebastian},
    pmid = {27398867},
    timestamp = {2016.07.11},
    }
  • [DOI] Wang, M., Carver, J. J., Phelan, V. V., Sanchez, L. M., Garg, N., Peng, Y., Nguyen, D. D., Watrous, J., Kapono, C. A., Luzzatto-Knaan, T., Porto, C., Bouslimani, A., Melnik, A. V., Meehan, M. J., Liu, W., Crüsemann, M., Boudreau, P. D., Esquenazi, E., Sandoval-Calderón, M., Kersten, R. D., Pace, L. A., Quinn, R. A., Duncan, K. R., Hsu, C., Floros, D. J., Gavilan, R. G., Kleigrewe, K., Northen, T., Dutton, R. J., Parrot, D., Carlson, E. E., Aigle, B., Michelsen, C. F., Jelsbak, L., Sohlenkamp, C., Pevzner, P., Edlund, A., McLean, J., Piel, J., Murphy, B. T., Gerwick, L., Liaw, C., Yang, Y., Humpf, H., Maansson, M., Keyzers, R. A., Sims, A. C., Johnson, A. R., Sidebottom, A. M., Sedio, B. E., Klitgaard, A., Larson, C. B., {Boya P}, C. A., Torres-Mendoza, D., Gonzalez, D. J., Silva, D. B., Marques, L. M., Demarque, D. P., Pociute, E., O’Neill, E. C., Briand, E., Helfrich, E. J. N., Granatosky, E. A., Glukhov, E., Ryffel, F., Houson, H., Mohimani, H., Kharbush, J. J., Zeng, Y., Vorholt, J. A., Kurita, K. L., Charusanti, P., McPhail, K. L., Nielsen, K. F., Vuong, L., Elfeki, M., Traxler, M. F., Engene, N., Koyama, N., Vining, O. B., Baric, R., Silva, R. R., Mascuch, S. J., Tomasi, S., Jenkins, S., Macherla, V., Hoffman, T., Agarwal, V., Williams, P. G., Dai, J., Neupane, R., Gurr, J., Rodríguez, A. M. C., Lamsa, A., Zhang, C., Dorrestein, K., Duggan, B. M., Almaliti, J., Allard, P., Phapale, P., Nothias, L., Alexandrov, T., Litaudon, M., Wolfender, J., Kyle, J. E., Metz, T. O., Peryea, T., Nguyen, D., VanLeer, D., Shinn, P., Jadhav, A., Müller, R., Waters, K. M., Shi, W., Liu, X., Zhang, L., Knight, R., Jensen, P. R., Palsson, B. {Ø}., Pogliano, K., Linington, R. G., Gutiérrez, M., Lopes, N. P., Gerwick, W. H., Moore, B. S., Dorrestein, P. C. & Bandeira, N. (2016) Sharing and community curation of mass spectrometry data with Global Natural Products Social molecular networking. IN Nat biotechnol, 34.828–837.
    [Bibtex]
    @Article{wang16sharing,
    author = {Wang, Mingxun and Carver, Jeremy J. and Phelan, Vanessa V. and Sanchez, Laura M. and Garg, Neha and Peng, Yao and Nguyen, Don Duy and Watrous, Jeramie and Kapono, Clifford A. and Luzzatto-Knaan, Tal and Porto, Carla and Bouslimani, Amina and Melnik, Alexey V. and Meehan, Michael J. and Liu, Wei-Ting and Cr{\"{u}}semann, Max and Boudreau, Paul D. and Esquenazi, Eduardo and Sandoval-Calder{\'{o}}n, Mario and Kersten, Roland D. and Pace, Laura A. and Quinn, Robert A. and Duncan, Katherine R. and Hsu, Cheng-Chih and Floros, Dimitrios J. and Gavilan, Ronnie G. and Kleigrewe, Karin and Northen, Trent and Dutton, Rachel J. and Parrot, Delphine and Carlson, Erin E. and Aigle, Bertrand and Michelsen, Charlotte F. and Jelsbak, Lars and Sohlenkamp, Christian and Pevzner, Pavel and Edlund, Anna and McLean, Jeffrey and Piel, J{\"{o}}rn and Murphy, Brian T. and Gerwick, Lena and Liaw, Chih-Chuang and Yang, Yu-Liang and Humpf, Hans-Ulrich and Maansson, Maria and Keyzers, Robert A. and Sims, Amy C. and Johnson, Andrew R. and Sidebottom, Ashley M. and Sedio, Brian E. and Klitgaard, Andreas and Larson, Charles B. and {Boya P}, Cristopher A. and Torres-Mendoza, Daniel and Gonzalez, David J. and Silva, Denise B. and Marques, Lucas M. and Demarque, Daniel P. and Pociute, Egle and O'Neill, Ellis C. and Briand, Enora and Helfrich, Eric J N. and Granatosky, Eve A. and Glukhov, Evgenia and Ryffel, Florian and Houson, Hailey and Mohimani, Hosein and Kharbush, Jenan J. and Zeng, Yi and Vorholt, Julia A. and Kurita, Kenji L. and Charusanti, Pep and McPhail, Kerry L. and Nielsen, Kristian Fog and Vuong, Lisa and Elfeki, Maryam and Traxler, Matthew F. and Engene, Niclas and Koyama, Nobuhiro and Vining, Oliver B. and Baric, Ralph and Silva, Ricardo R. and Mascuch, Samantha J. and Tomasi, Sophie and Jenkins, Stefan and Macherla, Venkat and Hoffman, Thomas and Agarwal, Vinayak and Williams, Philip G. and Dai, Jingqui and Neupane, Ram and Gurr, Joshua and Rodr{\'{i}}guez, Andr{\'{e}}s M C. and Lamsa, Anne and Zhang, Chen and Dorrestein, Kathleen and Duggan, Brendan M. and Almaliti, Jehad and Allard, Pierre-Marie and Phapale, Prasad and Nothias, Louis-Felix and Alexandrov, Theodore and Litaudon, Marc and Wolfender, Jean-Luc and Kyle, Jennifer E. and Metz, Thomas O. and Peryea, Tyler and Nguyen, Dac-Trung and VanLeer, Danielle and Shinn, Paul and Jadhav, Ajit and M{\"{u}}ller, Rolf and Waters, Katrina M. and Shi, Wenyuan and Liu, Xueting and Zhang, Lixin and Knight, Rob and Jensen, Paul R. and Palsson, Bernhard {\O}. and Pogliano, Kit and Linington, Roger G. and Guti{\'{e}}rrez, Marcelino and Lopes, Norberto P. and Gerwick, William H. and Moore, Bradley S. and Dorrestein, Pieter C. and Bandeira, Nuno},
    title = {Sharing and community curation of mass spectrometry data with {Global Natural Products Social} Molecular Networking},
    journal = {Nat Biotechnol},
    year = {2016},
    volume = {34},
    number = {8},
    pages = {828--837},
    abstract = {The potential of the diverse chemistries present in natural products (NP) for biotechnology and medicine remains untapped because NP databases are not searchable with raw data and the NP community has no way to share data other than in published papers. Although mass spectrometry (MS) techniques are well-suited to high-throughput characterization of NP, there is a pressing need for an infrastructure to enable sharing and curation of data. We present Global Natural Products Social Molecular Networking (GNPS; http://gnps.ucsd.edu), an open-access knowledge base for community-wide organization and sharing of raw, processed or identified tandem mass (MS/MS) spectrometry data. In GNPS, crowdsourced curation of freely available community-wide reference MS libraries will underpin improved annotations. Data-driven social-networking should facilitate identification of spectra and foster collaborations. We also introduce the concept of 'living data' through continuous reanalysis of deposited data.},
    doi = {10.1038/nbt.3597},
    owner = {Sebastian},
    pii = {nbt.3597},
    pmid = {27504778},
    timestamp = {2016.08.16},
    }
  • [DOI] Winter, S., Jahn, K., Wehner, S., Kuchenbecker, L., Marz, M., Stoye, J. & Böcker, S. (2016) Finding approximate gene clusters with Gecko\,3. IN Nucleic acids res, 44.9600-9610.
    [Bibtex]
    @Article{winter16finding,
    author = {Sascha Winter and Katharina Jahn and Stefanie Wehner and Leon Kuchenbecker and Manja Marz and Jens Stoye and Sebastian B\"ocker},
    title = {Finding approximate gene clusters with {Gecko\,3}},
    journal = {Nucleic Acids Res},
    year = {2016},
    volume = {44},
    number = {20},
    pages = {9600-9610},
    abstract = {Gene-order based comparison of multiple genomes provides signals for functional analysis of genes and the evolutionary process of genome organization. Gene clusters are regions of co-localized genes on genomes of different species. The rapid increase in sequenced
    genomes necessitates bioinformatics tools for finding gene clusters in hundreds of genomes. Existing tools are often restricted to few (in many cases, only two) genomes, and often make restrictive assumptions such as short perfect conservation, conserved gene order, or
    monophyletic gene clusters.
    We present Gecko 3, an open-source software for finding gene clusters in hundreds of bacterial genomes, that comes with an easy-to-use graphical user interface. The underlying gene cluster model is intuitive, can cope with low degrees of conservation as well as misannotations, and is complemented by a sound statistical evaluation. To evaluate the biological benefit of Gecko 3 and to exemplify our method, we search for gene clusters in a dataset of 678 bacterial genomes using Synechocystis sp. PCC 6803 as a reference. We confirm detected gene clusters reviewing the literature and comparing them to a database of operons; we detect two novel clusters, which were confirmed by publicly available experimental RNA-Seq data. The computational analysis is carried out on a laptop computer in less than 40 minutes.},
    doi = {10.1093/nar/gkw843},
    keywords = {jena; gene cluster; 3AGC; approximate gene cluster; finding gene clusters; operons;},
    owner = {Sebastian},
    pmid = {27679480},
    timestamp = {2016.09.16},
    }

2015

  • [DOI] Böcker, S. (2015) Ten times eighteen. IN J inform process jpn, 23.258–264.
    [Bibtex]
    @Article{boecker15ten,
    author = {Sebastian B\"ocker},
    title = {Ten times eighteen},
    journal = {J Inform Process Jpn},
    year = {2015},
    volume = {23},
    number = {3},
    pages = {258--264},
    abstract = {We consider the following simple game: We are given a table with ten slots indexed one to ten. In each of the ten rounds of the game, three dice are rolled and the numbers are added. We then put this number into any free slot. For each slot, we multiply the slot index with the number in this slot, and add up the products. The goal of the game is to maximize this score. In more detail, we play the game many times, and try to maximize the sum of scores or, equivalently, the expected score. We present a strategy to optimally play this game with respect to the expected score. We then modify our strategy so that we need only polynomial time and space. Finally, we show that knowing all ten rolls in advance, results in a relatively small increase in score. Although the game has a random component and requires a non-trivial strategy to be solved optimally, this strategy needs only polynomial time and space.},
    doi = {10.2197/ipsjjip.23.258},
    keywords = {jena;},
    owner = {Sebastian},
    timestamp = {2015.01.08},
    }
  • [DOI] Dührkop, K. & Böcker, S. (2015) Fragmentation trees reloaded , 65–79.
    [Bibtex]
    @InProceedings{duehrkop15fragmentation,
    author = {Kai D\"uhrkop and Sebastian B\"ocker},
    title = {Fragmentation trees reloaded},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2015)},
    year = {2015},
    volume = {9029},
    series = lncs,
    pages = {65--79},
    organization = Springer,
    abstract = {Metabolites, small molecules that are involved in cellular reactions, provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually relies on tandem mass spectrometry to identify the thousands of compounds in a biological sample. Today, the vast majority of metabolites remain unknown. Fragmentation trees have become a powerful tool for the interpretation of tandem mass spectrometry data of small molecules. These trees are found by combinatorial optimization, and aim at explaining the experimental data via fragmentation cascades. To obtain biochemically meaningful results requires an elaborate optimization function.
    We present a new scoring for computing fragmentation trees, transforming the combinatorial optimization into a maximum a~posteriori estimator. We demonstrate the superiority of the new scoring for two tasks: Both for the de novo identification of molecular formulas of unknown compounds, and for searching a database for structurally similar compounds, our methods performs significantly better than the previous scoring, as well as other methods for this task. Our method can expedite the workflow for untargeted metabolomics, allowing researchers to investigate unknowns using automated computational methods.},
    doi = {10.1007/978-3-319-16706-0_10},
    journalref = {boecker16fragmentation},
    keywords = {jena; FT; fragmentation trees;},
    owner = {Sebastian},
    timestamp = {2013.06.10},
    }
  • [DOI] Dührkop, K., Shen, H., Meusel, M., Rousu, J. & Böcker, S. (2015) Searching molecular structure databases with tandem mass spectra using CSI:FingerID. IN Proc natl acad sci u s a, 112.12580–12585.
    [Bibtex]
    @Article{duehrkop15searching,
    author = {Kai D\"uhrkop and Huibin Shen and Marvin Meusel and Juho Rousu and Sebastian B\"ocker},
    title = {Searching molecular structure databases with tandem mass spectra using {CSI:FingerID}},
    journal = {Proc Natl Acad Sci U S A},
    year = {2015},
    volume = {112},
    number = {41},
    pages = {12580--12585},
    abstract = {Metabolites provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually rely on tandem MS to identify the thousands of compounds in a biological sample. Today, the vast majority of metabolites remain unknown. We present a method for searching molecular structure databases using tandem MS data of small molecules. Our method computes a fragmentation tree that best explains the fragmentation spectrum of an unknown molecule. We use the fragmentation tree to predict the molecular structure fingerprint of the unknown compound using machine learning. This fingerprint is then used to search a molecular structure database such as PubChem. Our method is shown to improve on the competing methods for computational metabolite identification by a considerable margin.},
    doi = {10.1073/pnas.1509788112},
    keywords = {jena; MS; tandem MS; molecular fingerprints; FingerID; databases; molecular structures;},
    optmonth = oct,
    owner = {Sebastian},
    pmid = {26392543},
    timestamp = {2015.08.12},
    }
  • [DOI] Engler, M. S., Crotty, S., Barthel, M. J., Pietsch, C., Knop, K., Schubert, U. S. & Böcker, S. (2015) COCONUT – an efficient tool for estimating copolymer compositions from mass spectra. IN Anal chem, 87.5223–5231.
    [Bibtex]
    @Article{engler15coconut,
    author = {Engler, Martin S. and Crotty, Sarah and Barthel, Markus J and Pietsch, Christian and Knop, Katrin and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {{COCONUT} -- An Efficient Tool for Estimating Copolymer Compositions from Mass Spectra},
    journal = {Anal Chem},
    year = {2015},
    volume = {87},
    number = {10},
    pages = {5223--5231},
    abstract = {The accurate characterization of synthetic polymer sequences represents a major challenge in polymer science. Matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF MS) is frequently used for the characterization of copolymer samples. We present the COCONUT software for estimating the composition distribution of the copolymer. Our method is based on Linear Programming and is capable of automatically resolving overlapping isotopes and isobaric ions. We demonstrate that COCONUT is well suited for analyzing complex copolymer MS spectra. COCONUT is freely available and provides a graphical user interface.},
    doi = {10.1021/acs.analchem.5b00146},
    file = {EnglerEtAl_COCONUTEfficientTool_AnalChem_2015_preprint.pdf:2015/EnglerEtAl_COCONUTEfficientTool_AnalChem_2015_preprint.pdf:PDF},
    keywords = {polymer ms; jena; linkPDF},
    owner = {martin},
    pmid = {25884349},
    timestamp = {2015.04.20},
    url = {http://pubs.acs.org/articlesonrequest/AOR-ek4NKbcskMyjJttVVcEJ},
    }
  • [DOI] Fleischauer, M. & Böcker, S. (2015) Collecting reliable clades using the greedy strict consensus merger Peerj inc. san francisco, usa, e1595.
    [Bibtex]
    @InProceedings{fleischauer15collecting,
    author = {Fleischauer, Markus and B{\"o}cker, Sebastian},
    title = {Collecting reliable clades using the Greedy Strict Consensus Merger},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2015)},
    year = {2015},
    volume = {3},
    series = {PeerJ PrePrints},
    pages = {e1595},
    publisher = {PeerJ Inc. San Francisco, USA},
    abstract = {Supertree methods combine a set of phylogenetic trees into a single supertree. Similar to supermatrix methods, these methods provide a way to reconstruct larger parts of the Tree of Life, potentially evading the computational complexity of phylogenetic inference methods such as maximum likelihood. The supertree problem can be formalized in different ways, to cope with contradictory information in the input. Many supertree methods have been developed. Some of them solve NP-hard optimization problems like the well known Matrix Representation with Parsimony, others have polynomial worst-case running time but work in a greedy fashion (FlipCut). Both can profit from a set of clades that are already known to be part of the supertree. The Superfine approach shows how the Greedy Strict Consensus Merger (GSCM) can be used as preprocessing to find these clades. We introduce different scoring functions for the GSCM, a randomization, as well as a combination thereof to improve the GSCM to find more clades. This helps, in turn, to improve the resolution of the final supertree. We find this modifications to increase the number of true positive clades by 16\% while decreasing the number of false positive clades by 3\% compared to the currently used Overlap scoring.},
    doi = {10.7287/peerj.preprints.1297v1},
    file = {FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2015.pdf:2015/FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2015.pdf:PDF},
    journalref = {fleischauer16collecting},
    keywords = {jena; supertree; phylogenetics; scm;},
    owner = {fleisch},
    timestamp = {2015.08.27},
    url = {https://peerj.com/preprints/1297v1/},
    }
  • [DOI] Kulkarni, P., Kaftan, F., Kynast, P., Svatoš, A. & Böcker, S. (2015) Correcting mass shifts: a lock-mass-free recalibration procedure for mass spectrometry imaging data. IN Anal bioanal chem, 405.7603–7613.
    [Bibtex]
    @Article{kulkarni15correcting,
    author = {Purva Kulkarni and Filip Kaftan and Philipp Kynast and Ale\v{s} Svato\v{s} and Sebastian B\"ocker},
    title = {Correcting mass shifts: A lock-mass-free recalibration procedure for mass spectrometry imaging data},
    journal = {Anal Bioanal Chem},
    year = {2015},
    volume = {405},
    number = {25},
    pages = {7603--7613},
    abstract = {Mass spectrometry imaging (MSI) has become widely popular
    because of its potential to map the spatial distribution of thousands
    of compounds in a single measurement directly from tissue
    surfaces. With every MSI experiment, it is important to maintain
    high mass accuracy for correct identification of the observed ions.
    Many times this can be compromised due to different experimental
    factors, leading to erroneous assignment of peaks. This makes
    recalibration a crucial preprocessing step. We describe a lock
    mass-free mass spectra recalibration method, which enables to
    significantly reduce these mass shift effects. The recalibration
    method is applied in three steps: First, we decide on an order to
    process all the spectra. Herein, we describe three different methods
    for ordering the spectra - minimum spanning tree (MST),
    topological greedy (TG), and crystal growth (CG). Second, we
    construct a reference (consensus) spectrum, from the ordered
    spectra, and third, all spectra are individually corrected against this
    consensus spectrum. The performance of the recalibration method
    is demonstrated on three imaging datasets acquired from matrix-assisted laser desorptionionization (MALDI) and laser
    desorption/ionization (LDI) mass spectrometry imaging of
    whole-body Drosophila melanogaster fly. The applied recalibration
    method is shown to strongly reduce the observed mass shifts in the
    imaging datasets. Among the three ordering methods, CG and
    MST perform comparatively better than TG and highly decrease
    the overall standard deviation of the mass error distribution. Lock
    mass correction of MSI data is practically difficult, as not all spectra
    contain the selected lock mass peak. Our method eliminates this
    need.},
    doi = {10.1007/s00216-015-8935-4},
    keywords = {Mass spectrometry imaging, Recalibration, Mass shift correction, Data processing, jena},
    optmonth = oct,
    owner = {Sebastian},
    pmid = {26345438},
    timestamp = {2015.08.27},
    url = {http://link.springer.com/article/10.1007/s00216-015-8935-4},
    }
  • [DOI] White, T. W. J., Beyer, S., Dührkop, K., Chimani, M. & Böcker, S. (2015) Speedy colorful subtrees Springer, berlin, 310–322.
    [Bibtex]
    @InProceedings{white15speedy,
    author = {W. Timothy J. White and Stephan Beyer and Kai D\"uhrkop and Markus Chimani and Sebastian B\"ocker},
    title = {Speedy Colorful Subtrees},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2015)},
    year = {2015},
    volume = {9198},
    series = lncs,
    pages = {310--322},
    publisher = Springer,
    abstract = {Fragmentation trees are a technique for identifying molecular formulas and deriving some chemical properties of metabolites---small organic molecules---solely from mass spectral data. Computing these trees involves finding exact solutions to the NP-hard Maximum Colorful Subtree problem. Existing solvers struggle to solve the large instances involved fast enough to keep up with instrument throughput, and their performance remains a hindrance to adoption in practice. We attack this problem on two fronts: by combining fast and effective reduction algorithms with a strong integer linear program (ILP) formulation of the problem, we achieve overall speedups of 9.4 fold and 8.8 fold on two sets of real-world problems---without sacrificing optimality. Both approaches are, to our knowledge, the first of their kind for this problem. We also evaluate the strategy of solving global problem instances, instead of first subdividing them into many candidate instances as has been done in the past. Software (C++ source for our reduction program and our CPLEX/Gurobi driver program) available under LGPL at https://github.com/wtwhite/speedy_colorful_subtrees/.},
    doi = {10.1007/978-3-319-21398-9_25},
    file = {WhiteEtAl_SpeedyColorfulSubtrees_preprint_2015.pdf:2015/WhiteEtAl_SpeedyColorfulSubtrees_preprint_2015.pdf:PDF},
    keywords = {jena; linkpdf; PABI; IDUN; tandem ms;},
    owner = {wtwhite},
    timestamp = {2015.04.20},
    }

2014

  • Böcker, S., Klau, G. W. & Leong, H. W. (2014) Towards the ground truth: exact algorithms for bioinformatics research (nii shonan meeting report 2014-2) .
    [Bibtex]
    @TechReport{boecker14towards,
    author = {Sebastian B\"ocker and Gunnar W. Klau and Hon Wai Leong},
    title = {Towards the ground truth: Exact algorithms for bioinformatics research (NII Shonan Meeting Report 2014-2)},
    year = {2014},
    type = {NII Shonan Meeting Report},
    number = {2014-2},
    abstract = {Today, bioinformatics has become an integral and indispensable part of life science research: Success stories include the assembly and deciphering of genomes, understanding the complexity of cellular processes by means of biological networks, recovering the {\textquotedblleft}tree of life{\textquotedblright}, and deciding on treatment plans for HIV or cancer patients. Applications range from fundamental questions such as the origin of life to multi-billion dollar decisions on novel drug leads and molecular modeling. None of these questions could be approached without massive support from bioinformatics.},
    keywords = {jena; fpt; np-hard problems; forschergruppe;},
    owner = {Sebastian},
    timestamp = {2014.10.24},
    url = {http://shonan.nii.ac.jp/seminar/reports/wp-content/uploads/sites/56/2015/02/No.2014-2.pdf},
    }
  • [DOI] Böcker, S. & Wagner, S. (2014) Counting glycans revisited. IN J math biol, 69.799–816.
    [Bibtex]
    @Article{boecker14counting,
    author = {Sebastian B\"ocker and Stephan Wagner},
    title = {Counting glycans revisited},
    journal = {J Math Biol},
    year = {2014},
    volume = {69},
    number = {4},
    pages = {799--816},
    abstract = {We present an algorithm for counting glycan topologies of order n that improves on previously described algorithms by a factor n in both time and space. More generally, we provide such an algorithm for counting rooted or unrooted d-ary trees with labels or masses assigned to the vertices, and we give a "recipe" to estimate the asymptotic growth of the resulting sequences. We provide constants for the asymptotic growth of d-ary trees and labeled quaternary trees (glycan topologies). Finally, we show how a classical result from enumeration theory can be used to count glycan structures where edges are labeled by bond types. Our method also improves time bounds for counting alkanes.},
    doi = {10.1007/s00285-013-0721-3},
    file = {:2014/BoeckerWagner_CountingGlycansRevisited_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; glycans; counting; counting glycans; generating functions; Polya;},
    owner = {Sebastian},
    pmid = {23974240},
    timestamp = {2013.07.24},
    }
  • Beckstein, C., Böcker, S., Bogdan, M., Bruelheide, H., Bücker, M. H., Denzler, J., Dittrich, P., , I. G., Hinneburg, A., König-Ries, B., Löffler, F., Marz, M., Müller-Hannemann, M., Winter, M. & Zimmermann, W. (2014) Explorative analysis of heterogeneous, unstructured, and uncertain data: a computer science perspective on biodiversity research Scitepress, 251–257.
    [Bibtex]
    @InProceedings{beckstein14explorative,
    author = {Clemens Beckstein and Sebastian B\"ocker and Martin Bogdan and Helge Bruelheide and H. Martin B\"ucker and Joachim Denzler and Peter Dittrich and Ivo Gro\ss{}e and Alexander Hinneburg and Birgitta K\"onig-Ries and Felicitas L\"offler and Manja Marz and Matthias M\"uller-Hannemann and Martin Winter and Wolf Zimmermann},
    title = {Explorative Analysis of Heterogeneous, Unstructured, and Uncertain Data: A Computer Science Perspective on Biodiversity Research},
    booktitle = {Proc. of Data Management Technologies and Applications (DATA 2014)},
    year = {2014},
    pages = {251--257},
    publisher = {SciTePress},
    abstract = {We outline a blueprint for the development of new computer science approaches for the management and analysis of big data problems for biodiversity science. Such problems are characterized by a combination of different data sources each of which owns at least one of the typical characteristics of big data (volume, variety, velocity, or veracity). For these problems, we envision a solution that covers different aspects of integrating data sources and algorithms for their analysis on one of the following three layers: At the data layer, there are various data archives of heterogeneous, unstructured, and uncertain data. At the functional layer, the data are analyzed for each archive individually. At the meta-layer, multiple functional archives are combined for complex analysis.},
    keywords = {jena; iDiv; biodiversity;},
    opteditor = {M. Helfert and A. Holzinger and O. Belo and C. Francalanci},
    owner = {Sebastian},
    timestamp = {2014.06.25},
    }
  • Dührkop, K. & Böcker, S. (2014) Fragmentation trees reloaded .
    [Bibtex]
    @TechReport{duehrkop14fragmentation,
    author = {Kai D\"uhrkop and Sebastian B\"ocker},
    title = {Fragmentation trees reloaded},
    year = {2014},
    note = {arXiv: 1412.1929},
    abstract = {Metabolites, small molecules that are involved in cellular reactions, provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually relies on tandem mass spectrometry to identify the thousands of compounds in a biological sample. Today, the vast majority of metabolites remain unknown. Fragmentation trees have become a powerful tool for the interpretation of tandem mass spectrometry data of small molecules. These trees are found by combinatorial optimization, and aim at explaining the experimental data via fragmentation cascades. To obtain biochemically meaningful results requires an elaborate optimization function. We present a new scoring for computing fragmentation trees, transforming the combinatorial optimization into a maximum a posteriori estimator. We demonstrate the superiority of the new scoring for two tasks: Both for the de novo identification of molecular formulas of unknown compounds, and for searching a database for structurally similar compounds, our methods performs significantly better than the previous scoring, as well as other methods for this task. Our method can expedite the workflow for untargeted metabolomics, allowing researchers to investigate unknowns using automated computational methods.fleischauer},
    keywords = {jena; FT; fragmentation trees;},
    owner = {Sebastian},
    timestamp = {2013.06.10},
    url = {http://arxiv.org/abs/1412.1929},
    }
  • [DOI] Dührkop, K., Hufsky, F. & Böcker, S. (2014) Molecular formula identification using isotope pattern analysis and calculation of fragmentation trees. IN Mass spectrom, 3.S0037.
    [Bibtex]
    @Article{duehrkop14molecular,
    author = {Kai D\"uhrkop and Franziska Hufsky and Sebastian B\"ocker},
    title = {Molecular Formula Identification Using Isotope Pattern Analysis and Calculation of Fragmentation Trees},
    journal = {Mass Spectrom},
    year = {2014},
    volume = {3},
    number = {special issue 2},
    pages = {S0037},
    abstract = {We present the results of a fully automated de novo approach for identification of molecular formulas in the CASMI 2013 contest. Only results for Category 1 (molecular formula identification) were submitted. Our approach combines isotope pattern analysis and fragmentation pattern analysis and is completely independent from any (spectral and structural) database. We correctly identified the molecular formula for ten out of twelve challenges, being the best automated method competing in this category.},
    doi = {10.5702/massspectrometry.S0037},
    keywords = {jena; MS; CASMI; SIRIUS;},
    owner = {Sebastian},
    timestamp = {2014.05.19},
    }
  • [DOI] Doerr, D., Stoye, J., Böcker, S. & Jahn, K. (2014) Identifying gene clusters by discovering common intervals in indeterminate strings. IN Bmc genomics, 15.S2.
    [Bibtex]
    @Article{doerr14identifying,
    author = {Daniel Doerr and Jens Stoye and Sebastian B\"ocker and Katharina Jahn},
    title = {Identifying gene clusters by discovering common intervals in indeterminate strings},
    journal = {BMC Genomics},
    year = {2014},
    volume = {15},
    number = {Suppl 6},
    pages = {S2},
    note = {Proc. of \emph{RECOMB Satelite Workshop on Comparative Genomics} (RECOMB-CG 2014)},
    abstract = {Comparative analyses of chromosomal gene orders are successfully used to predict gene clusters in bacterial and fungal genomes. Present models for detecting sets of co-localized genes in chromosomal sequences require prior knowledge of gene family assignments between genes in the dataset of interest. These families are often computationally predicted on the basis of sequence similarity or higher order features of gene products. Erroneous gene family assignments emerging in this process are amplified in subsequent gene order analyses and thus may deteriorate gene cluster prediction. In this work we present a new dynamic model and efficient computational approaches for gene cluster prediction suitable in scenarios ranging from traditional gene family-based gene cluster prediction, via multiple conflicting gene family annotations, to gene family-free analysis, in which gene clusters are predicted solely on the basis of a pairwise similarity measure between the genes of different genomes. We evaluate our gene family-free model against a gene family-based model on a dataset of 93 bacterial genomes. Our model is able to detect gene clusters that would be also detected with well-established gene-family based approaches. Moreover, we show that it is able to detect conserved regions which are missed by gene family-based methods due to wrong or deficient gene family assignments.},
    booktitle = {Proc. of},
    doi = {10.1186/1471-2164-15-S6-S2},
    keywords = {jena; gene clusters; common intervals;},
    owner = {Sebastian},
    pmid = {25571793},
    timestamp = {2014.07.30},
    url = {http://www.biomedcentral.com/1471-2164/15/S6/S2},
    }
  • Hufsky, F. (2014) Novel methods for the analysis of small molecule fragmentation mass spectra. PhD Thesis. Jena, Germany, .
    [Bibtex]
    @PhdThesis{hufsky14novel,
    author = {Franziska Hufsky},
    title = {Novel Methods for the Analysis of Small Molecule Fragmentation Mass Spectra},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2014},
    address = {Jena, Germany},
    file = {Hufsky_NovelMethodsAnalysis_PhdThesis_2014.pdf:2014/Hufsky_NovelMethodsAnalysis_PhdThesis_2014.pdf:PDF},
    keywords = {jena},
    owner = {fhufsky},
    timestamp = {2014.06.20},
    url = {http://www3.db-thueringen.de/servlets/DerivateServlet/Derivate-29781/},
    }
  • [DOI] Hufsky, F., Scheubert, K. & Böcker, S. (2014) Computational mass spectrometry for small molecule fragmentation. IN Trends anal chem, 53.41–48.
    [Bibtex]
    @Article{hufsky14computational,
    author = {Franziska Hufsky and Kerstin Scheubert and Sebastian B\"ocker},
    title = {Computational mass spectrometry for small molecule fragmentation},
    journal = {Trends Anal Chem},
    year = {2014},
    volume = {53},
    pages = {41--48},
    abstract = {The identification of small molecules from mass spectrometry (MS) data remains a major challenge in the interpretation of MS data. Computational aspects of identifying small molecules range from searching a reference spectral library to the structural elucidation of an unknown. In this review, we concentrate on five important aspects of the computational analysis. We find that novel computational methods may overcome the boundaries of spectral libraries, by searching in the more comprehensive molecular structure databases, or not requiring any databases at all.},
    comment = {url is good only until March 26, 2014},
    doi = {10.1016/j.trac.2013.09.008},
    file = {:2014/HufskyEtAl_ComputationalMassSpectrometry_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; review;},
    owner = {fhufsky},
    timestamp = {2013.09.19},
    url = {http://elsarticle.com/1ej1Fsn},
    }
  • [DOI] Hufsky, F., Scheubert, K. & Böcker, S. (2014) New kids on the block: Novel informatics methods for natural product discovery. IN Nat prod rep, 31.807–817.
    [Bibtex]
    @Article{hufsky14new,
    author = {Franziska Hufsky and Kerstin Scheubert and Sebastian B\"ocker},
    title = {New kids on the block: {Novel} informatics methods for natural product discovery},
    journal = {Nat Prod Rep},
    year = {2014},
    volume = {31},
    number = {6},
    pages = {807--817},
    abstract = {Mass spectrometry is a key technology for the identification and structural elucidation of natural products. Manual interpretation of the resulting data is tedious and time-consuming, so methods for automated analysis are highly sought after. In this review, we focus on four recently developed methods for the detection and investigation of small molecules, namely MetFrag/MetFusion, ISIS, FingerID, and FT-BLAST. These methods have the potential to significantly advance the field of computational mass spectrometry for the research of natural products. For example, they may help with the dereplication of compounds at an early stage of the drug discovery process; that is, the detection of molecules that are identical or highly similar to known drugs or drug leads. Furthermore, when a potential drug lead has been determined, these tools may help to identify it and elucidate its structure.},
    doi = {10.1039/C3NP70101H},
    file = {:2014/HufskyEtAl_NewKidsBlock_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; review; computational MS;},
    owner = {fhufsky},
    pmid = {24752343},
    timestamp = {2013.10.24},
    }
  • [DOI] Kaftan, F., , V., Kynast, P., Kulkarni, P., Böcker, S., Cvačka, J., Knaden, M. & Svatoš, A. (2014) Mass spectrometry imaging of surface lipids on intact \textitDrosophila melanogaster flies. IN J mass spectrom, 49.223–232.
    [Bibtex]
    @Article{kaftan14mass,
    author = {Filip Kaftan and Vladim\'\i{}r Vrkoslav and Philipp Kynast and Purva Kulkarni and Sebastian B\"ocker and Josef Cva\v{c}ka and Markus Knaden and Ale\v{s} Svato\v{s}},
    title = {Mass Spectrometry Imaging of Surface Lipids on Intact \textit{{Drosophila melanogaster}} Flies},
    journal = {J Mass Spectrom},
    year = {2014},
    volume = {49},
    number = {3},
    pages = {223--232},
    abstract = {The spatial distribution of neutral lipids and semiochemicals on the surface of six-day-old separately reared naive Drosophila melanogaster flies has been visualized and studied using matrix-assisted laser desorption/ionization-time of flight (MALDI-TOF) mass spectrometry and laser-assisted desorption/ionization (LDI)-TOF imaging (MSI). Metal targets were designed for two-dimensional MSI of the surface of 3-D biological objects. Targets with either simple grooves or profiled holes designed to accurately accommodate the male and female bodies were fabricated. These grooves and especially holes ensured correct height fixation and spatial orientation of the flies on the targets after matrix application and sample drying. For LDI-TOF to be used, the flies were arranged into holes and fixed to a plane of the target using fast-setting glue. In MALDI-TOF mode, the flies were fixed as above and sprayed with a lithium 2,5-dihydroxybenzoate matrix using up to 100 airbrush spray cycles. The scanning electron microscopy images revealed that the deposits of matrix were homogenous and the matrix formed mostly into the clusters of crystals (40-80?$\mu$m) that were separated from each other by an uncovered cuticle surface (30-40?$\mu$m). The MSI using target with profiled holes provided superior results to the targets with simple grooves, eliminating the ion suppression/mass deviation due to the 3-D shape of the flies. Attention was paid to neutral lipids and other compounds including the male anti-attractant 11-cis-vaccenyl acetate for which the expected distribution with high concentration on the tip of the male abdomen was confirmed. The red and blue mass shift (PlusMinus1 colour scale) was observed associated with mass deviation predominantly between $\pm$0.2 and 0.3?Da. We use in-house developed software for mass recalibration, to eliminate the mass deviation effects and help with the detection of low-intensity mass signals. Copyright {\copyright} 2014 John Wiley & Sons, Ltd.},
    doi = {10.1002/jms.3331},
    file = {KaftanEtAl_ImagingSurfaceLipidsDrosophila_JMS_2014.pdf:2014/KaftanEtAl_ImagingSurfaceLipidsDrosophila_JMS_2014.pdf:PDF},
    keywords = {jena; imaging MS; MS; lipids; linkPDF;, Imaging MS, Mass Spectrometry Imaging, MALDI, IMS, SIMS},
    owner = {Sebastian},
    pmid = {24619548},
    timestamp = {2013.12.30},
    }
  • Ludwig, M. (2014) Improving fragmentation trees from electron ionization mass spectra using isotope information. master thesis Master Thesis. .
    [Bibtex]
    @MastersThesis{ludwig14improving,
    author = {Marcus Ludwig},
    title = {Improving Fragmentation Trees from Electron Ionization Mass Spectra using Isotope Information},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2014},
    type = {master thesis},
    keywords = {jena; ms; ft;},
    owner = {bertram},
    timestamp = {2014.08.08},
    }
  • [DOI] Napagoda, M., Gerstmeier, J., Koeberle, A., Wesely, S., Popella, S., Lorenz, S., Scheubert, K., Böcker, S., Svatoš, A. & Werz, O. (2014) \textitMunronia pinnata (Wall.) Theob.: unveiling phytochemistry and dual inhibition of 5-lipoxygenase and microsomal prostaglandin E$_2$ synthase (mPGES)-1. IN J ethnopharmacol, 151.882–890.
    [Bibtex]
    @Article{napagoda14munronia,
    author = {Mayuri Napagoda and Jana Gerstmeier and Andreas Koeberle and Sandra Wesely and Sven Popella and Sybille Lorenz and Kerstin Scheubert and Sebastian B\"ocker and Ale\v{s} Svato\v{s} and Oliver Werz},
    title = {\textit{Munronia pinnata} ({Wall.}) {Theob.}: Unveiling phytochemistry and dual inhibition of 5-lipoxygenase and microsomal prostaglandin {E$_2$} synthase {(mPGES)-1}},
    journal = {J Ethnopharmacol},
    year = {2014},
    volume = {151},
    number = {2},
    pages = {882--890},
    abstract = {Preparations from Munronia pinnata (Wall.) Theob. are extensively used in traditional medicine in Sri Lanka for the treatment of inflammatory conditions. However, neither the pharmacological features nor the phytochemistry of this plant are explored in order to understand and rationalize the reported ethnobotanical significance. As 5-lipoxygenase (5-LO) and microsomal prosta-glandin E2 synthase (mPGES)-1 are crucial enzymes in inflammatory disorders, we evaluated their inhibition by M. pinnata extracts and studied the chemical profile of the plant for the identification of relevant constituents. Cell-free and cell-based assays were employed in order to investigate the suppression of 5-LO and mPGES-1 activity. Cell viability, radical scavenger activities, and inhibition of reactive oxygen species formation (ROS) in neutrophils were studied to assess cytotoxic and antioxidant effects. Gas and liquid chromatography coupled to mass spectrometric analysis enabled the characterization of secondary metabolites. The n-hexane extract of M. pinnata efficiently suppressed 5-LO activity in stimulated human neutrophils (IC50 = 8.7 mg/ml) and potently inhibited isolated human recombinant 5-LO (IC50 = 0.48 mg/ml) and mPGES-1 (IC50 = 1.0 mg/ml). In contrast, no significant radical scavenging activity or suppression of ROS formation was observed, and neutrophil viability was unaffected. The phytochemistry of the plant was unveiled for the first time and phytosterols, fatty acids, sesquiterpenes and several other types of secondary metabolites were identified. Together, potent inhibition of 5-LO and mPGES-1 activity, without concomitant antioxidant activity and cytotoxic effects, rationalizes the ethnopharmacological use of M. pinnata as anti-inflammatory remedy. Detailed chromatographic/mass spectrometric analysis reveals discrete chemical structures of relevant constituents.},
    doi = {10.1016/j.jep.2013.11.052},
    keywords = {jena; IDUN; FT; fragmentation tree;},
    owner = {Sebastian},
    pmid = {24315851},
    timestamp = {2013.12.22},
    }
  • [DOI] Scheubert, K., Hufsky, F. & Böcker, S. (2014) Multiple mass spectrometry fragmentation trees revisited: boosting performance and quality Springer, berlin, 217–231.
    [Bibtex]
    @InProceedings{scheubert14multiple,
    author = {Kerstin Scheubert and Franziska Hufsky and Sebastian B\"ocker},
    title = {Multiple Mass Spectrometry Fragmentation Trees Revisited: Boosting Performance and Quality},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2014)},
    year = {2014},
    volume = {8701},
    series = lncs,
    pages = {217--231},
    publisher = Springer,
    abstract = {Mass spectrometry (MS) in combination with a fragmentation technique is the method of choice for analyzing small molecules in high throughput experiments. The automated interpretation of such data is highly non-trivial. Recently, fragmentation trees have been introduced for de novo analysis of tandem fragmentation spectra (MS$^2$), describing the fragmentation process of the molecule. Multiple-stage MS (MS$^n$) reveals additional information about the dependencies between fragments. Unfortunately, the computational analysis of MS$^n$ data using fragmentation trees turns out to be more challenging than for tandem mass spectra. We present an Integer Linear Program for solving the Combined Colorful Subtree problem, which is orders of magnitude faster than the currently best algorithm which is based on dynamic programming. Using the new algorithm, we show that correlation between structural similarity and fragmentation tree similarity increases when using the additional information gained from MS$^n$. Thus, we show for the first time that using MS$^n$ data improves the quality of fragmentation trees.},
    doi = {10.1007/978-3-662-44753-6_17},
    file = {:2014/ScheubertEtAl_MultipleMassSpectrometry_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; multiple MS; MSn; metabolites; small molecules;},
    owner = {Sebastian},
    timestamp = {2014.06.22},
    }
  • [DOI] Schwartze, V. U., Winter, S., Shelest, E., Marcet-Houben, M., Horn, F., Wehner, S., Linde, J., Valiante, V., Sammeth, M., Riege, K., Nowrousian, M., Kaerger, K., Jacobsen, I. D., Marz, M., Brakhage, A. A., Gabaldón, T., Böcker, S. & Voigt, K. (2014) Gene expansion shapes genome architecture in the human pathogen \textitLichtheimia corymbifera: an evolutionary genomics analysis in the ancient terrestrial Mucorales (Mucoromycotina). IN Plos genet, 10.e1004496.
    [Bibtex]
    @Article{schwartze14gene,
    author = {Volker U. Schwartze and Sascha Winter and Ekaterina Shelest and Marina Marcet-Houben and Fabian Horn and Stefanie Wehner and J\"org Linde and Vito Valiante and Michael Sammeth and Konstantin Riege and Minou Nowrousian and Kerstin Kaerger and Ilse D. Jacobsen and Manja Marz and Axel A. Brakhage and Toni Gabald\'on and Sebastian B\"ocker and Kerstin Voigt},
    title = {Gene expansion shapes genome architecture in the human pathogen \textit{{Lichtheimia corymbifera}}: an evolutionary genomics analysis in the ancient terrestrial {Mucorales} ({Mucoromycotina})},
    journal = {PLos Genet},
    year = {2014},
    volume = {10},
    number = {8},
    pages = {e1004496},
    abstract = {Lichtheimia species are the second most important cause of mucormycosis in Europe. To provide broader insights into the molecular basis of the pathogenicity-associated traits of the basal Mucorales, we report the full genome sequence of L. corymbifera and compared it to the genome of Rhizopus oryzae, the most common cause of mucormycosis worldwide. The genome assembly encompasses 33.6 MB and 12,379 protein-coding genes. This study reveals four major differences of the L. corymbifera genome to R. oryzae: (i) the presence of an highly elevated number of gene duplications which are unlike R. oryzae not due to whole genome duplication (WGD), (ii) despite the relatively high incidence of introns, alternative splicing (AS) is not frequently observed for the generation of paralogs and in response to stress, (iii) the content of repetitive elements is strikingly low (< 5\%), (iv) L. corymbifera is typically haploid. Novel virulence factors were identified which may be involved in the regulation of the adaptation to iron-limitation, e.g. LCor01340.1 encoding a putative siderophore transporter and LCor00410.1 involved in the siderophore metabolism. Genes encoding the transcription factors LCor08192.1 and LCor01236.1, which are similar to GATA type regulators and to calcineurin regulated CRZ1, respectively, indicating an involvement of the calcineurin pathway in the adaption to iron limitation. Genes encoding MADS-box transcription factors are elevated up to 11 copies compared to the 1-4 copies usually found in other fungi. More findings are: (i) lower content of tRNAs, but unique codons in L. corymbifera, (ii) Over 25\% of the proteins are apparently specific for L. corymbifera. (iii) L. corymbifera contains only 2/3 of the proteases (known to be essential virulence factors) in comparision to R. oryzae. On the other hand, the number of secreted proteases, however, is roughly twice as high a in R. oryzae.},
    doi = {10.1371/journal.pgen.1004496},
    keywords = {jena; 3AGC; gene clusters},
    owner = {Sebastian},
    pmid = {25121733},
    timestamp = {2014.05.25},
    }
  • [DOI] Shen, H., Dührkop, K., Böcker, S. & Rousu, J. (2014) Metabolite identification through multiple kernel learning on fragmentation trees. IN Bioinformatics, 30.i157-i164.
    [Bibtex]
    @Article{shen14metabolite,
    author = {Huibin Shen and Kai D\"uhrkop and Sebastian B\"ocker and Juho Rousu},
    title = {Metabolite Identification through Multiple Kernel Learning on Fragmentation Trees},
    journal = {Bioinformatics},
    year = {2014},
    volume = {30},
    number = {12},
    pages = {i157-i164},
    note = {Proc.\ of \emph{Intelligent Systems for Molecular Biology} (ISMB 2014)},
    abstract = {Metabolite identification from tandem mass spectrometric data is a key task in metabolomics. Various computational methods has been proposed for the identification of metabolites from tandem mass spectra. Fragmentation tree methods explore the space of possible ways the metabolite can fragment, and base the metabolite identification on scoring of these fragmentation trees. Machine learning methods has been used to map mass spectra to molecular fingerprints; predicted fingerprints, in turn, can be used to score candidate molecular structures. Here, we combine fragmentation tree computations with kernel-based machine learning to predict molecular fingerprints and identify molecular structures. We introduce a family of kernels capturing the similarity of fragmentation trees, and combine these kernels using recently proposed multiple kernel learning approaches. Experiments on two large reference datasets show that the new methods significantly improve molecular fingerprint prediction accuracy. These improvements result in better metabolite identification, doubling the number of metabolites ranked at the top position of the candidates list.},
    doi = {10.1093/bioinformatics/btu275},
    file = {ShenEtAl_MetaboliteIdentificationMultipleKernel_ISMB_2014.pdf:2014/ShenEtAl_MetaboliteIdentificationMultipleKernel_ISMB_2014.pdf:PDF},
    keywords = {jena; IDUN; MS; tandem MS;},
    owner = {fhufsky},
    pmid = {24931979},
    timestamp = {2014.02.11},
    }

2013

  • [DOI] Böcker, S. & Baumbach, J. (2013) Cluster editing Springer, berlin, 33–44.
    [Bibtex]
    @InProceedings{boecker13cluster,
    author = {Sebastian B\"ocker and Jan Baumbach},
    title = {Cluster Editing},
    booktitle = {Proc. of Computability in Europe (CIE 2013)},
    year = {2013},
    volume = {7921},
    series = lncs,
    pages = {33--44},
    publisher = Springer,
    abstract = {The Cluster Editing problem asks to transform a graph into a disjoint union of cliques using a minimum number of edge modifications. Although the problem has been proven NP-complete several times, it has nevertheless attracted much research both from the theoretical and the applied side. The problem has been the inspiration for numerous algorithms in bioinformatics, aiming at clustering entities such as genes, proteins, phenotypes, or patients. In this paper, we review exact and heuristic methods that have been proposed for the Cluster Editing problem, and also applications of these algorithms for biological problems.},
    doi = {10.1007/978-3-642-39053-1_5},
    keywords = {jena; FPT; cluster editing;},
    opteditor = {Paola Bonizzoni and Vasco Brattka and Benedikt L\"owe},
    owner = {Sebastian},
    timestamp = {2013.03.20},
    }
  • [DOI] Böcker, S., Canzar, S. & Klau, G. W. (2013) The generalized Robinson-Foulds metric Springer, berlin, 156–169.
    [Bibtex]
    @InProceedings{boecker13generalized,
    author = {Sebastian B\"ocker and Stefan Canzar and Gunnar W. Klau},
    title = {The generalized {Robinson-Foulds} metric},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2013)},
    year = {2013},
    volume = {8126},
    series = lncs,
    pages = {156--169},
    publisher = Springer,
    abstract = {The Robinson-Foulds (RF) metric is arguably the most widely used measure of phylogenetic tree similarity, despite its well-known shortcomings: For example, moving a single taxon in a tree can result in a tree that has maximum distance to the original one; but the two trees are identical if we remove the single taxon. To this end, we propose a natural extension of the RF metric that does not simply count identical clades but instead, also takes similar clades into consideration. In contrast to previous approaches, our model requires the matching between clades to respect the structure of the two trees, a property that the classical RF metric exhibits, too. We show that computing this generalized RF metric is, unfortunately, NP-hard. We then present a simple Integer Linear Program for its computation, and evaluate it by an all-against-all comparison of 100 trees from a benchmark data set. We find that matchings that respect the tree structure differ significantly from those that do not, underlining the importance of this natural condition.},
    doi = {10.1007/978-3-642-40453-5_13},
    keywords = {jena; fpt; phylogenetics; tree distance; Robinson-Foulds;},
    owner = {Sebastian},
    timestamp = {2013.06.21},
    url = {http://arxiv.org/abs/1307.7824},
    }
  • [DOI] Blin, G., Rizzi, R., Sikora, F. & Vialette, S. (2013) Minimum mosaic inference of a set of recombinants. IN Int j found comput s, 24.51.
    [Bibtex]
    @Article{blin13minimum,
    author = {Blin, Guillaume and Rizzi, Romeo and Sikora, Florian and Vialette, St\'ephane},
    title = {Minimum Mosaic Inference of a Set of Recombinants},
    journal = {Int J Found Comput S},
    year = {2013},
    volume = {24},
    number = {1},
    pages = {51},
    abstract = {Longest common subsequence is a widely used measure to compare strings, in particular in computational biology. Recently, several variants of the longest common subsequence have been introduced to tackle the comparison of genomes. In particular, the Repetition Free Longest Common Subsequence (RFLCS) problem is a variant of the LCS problem that asks for a longest common subsequence of two input strings with no repetition of symbols. In this paper, we investigate the parameterized complexity of RFLCS. First, we show that the problem does not admit a polynomial kernel. Then, we present a randomized FPT algorithm for the RFLCS problem, improving the time complexity of the existent FPT algorithm.},
    doi = {10.1142/S0129054113400042},
    keywords = {jena; PABI; fpt},
    }
  • [DOI] Brinkmeyer, M., Griebel, T. & Böcker, S. (2013) FlipCut supertrees: towards matrix representation accuracy in polynomial time. IN Algorithmica, 67.142–160.
    [Bibtex]
    @Article{brinkmeyer13flipcut,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {{FlipCut} Supertrees: Towards Matrix Representation Accuracy in Polynomial Time},
    journal = {Algorithmica},
    year = {2013},
    volume = {67},
    number = {2},
    pages = {142--160},
    abstract = {In computational phylogenetics, supertree methods provide a way to reconstruct larger clades of the Tree of Life. The problem of constructing a supertree of a given set of rooted input trees can be formalized in different ways, to cope with contradictory information in the input. In particular, there exist methods based on encoding the input trees in a matrix, and methods based on finding minimum cuts in some graph representing the incompatibility in the input. Evaluations have shown that the former matrix representation methods will compute supertrees of better quality but, unfortunately, the underlying problems are computationally hard, which can heavily constrain the size of reconstructed trees. In contrast, graph-based methods compute a supertree in polynomial time, but these supertrees are inferior in quality. In this paper, we present a novel approach for the computation of supertrees, called FlipCut supertree. Our method combines the computation of minimum cuts from graph-based methods with a matrix representation method, namely Minimum Flip Supertrees. Here, the input trees are encoded in a 0/1/?-matrix, and we present a heuristic to search for a minimum set of 0/1-flips such that the resulting matrix admits a directed perfect phylogeny. We then extend our approach, by using edge weights to weight the columns of the 0/1/?-matrix. In our evaluation, we show that our method is extremely swift in practice, and orders of magnitude faster than the runner up. Concerning supertree accuracy, our method outperforms most polynomial time supertree methods, and is sometimes on par with matrix representation with parsimony.},
    doi = {10.1007/s00453-012-9698-3},
    file = {:2012/BrinkmeyerEtAl_FlipCutSupertreesTowards_Algorithmica_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; FlipCut; supertrees; supertree methods},
    owner = {Sebastian},
    timestamp = {2010.11.10},
    }
  • [DOI] Dührkop, K., Ludwig, M., Meusel, M. & Böcker, S. (2013) Faster mass decomposition Springer, berlin, 45–58.
    [Bibtex]
    @InProceedings{duehrkop13faster,
    author = {Kai D\"uhrkop and Marcus Ludwig and Marvin Meusel and Sebastian B\"ocker},
    title = {Faster mass decomposition},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2013)},
    year = {2013},
    volume = {8126},
    series = lncs,
    pages = {45--58},
    publisher = Springer,
    abstract = {Metabolomics complements investigation of the genome, transcriptome, and proteome of an organism. Today, the vast majority of metabolites remain unknown, in particular for non-model organisms. Mass spectrometry is one of the predominant techniques for analyzing small molecules such as metabolites. A fundamental step for identifying a small molecule is to determine its molecular formula. Here, we present and evaluate three algorithm engineering techniques that speed up the molecular formula determination. For that, we modify an existing algorithm for decomposing the monoisotopic mass of a molecule. These techniques lead to a four-fold reduction of running times, and reduce memory consumption by up to 94\%. In comparison to the classical search tree algorithm, our algorithm reaches a 1000-fold speedup.},
    doi = {10.1007/978-3-642-40453-5_5},
    keywords = {jena; idun; mass decomposition; MS; mass spectrometry;},
    owner = {Sebastian},
    timestamp = {2013.06.21},
    url = {http://arxiv.org/abs/1307.7805},
    }
  • [DOI] Dührkop, K., Scheubert, K. & Böcker, S. (2013) Molecular formula identification with SIRIUS. IN Metabolites, 3.506–516.
    [Bibtex]
    @Article{duehrkop13molecular,
    author = {Kai D\"uhrkop and Kerstin Scheubert and Sebastian B\"ocker},
    title = {Molecular Formula Identification with {SIRIUS}},
    journal = {Metabolites},
    year = {2013},
    volume = {3},
    pages = {506--516},
    abstract = {We present results of the SIRIUS 2 submission to the 2012 CASMI contest. Only results for Category 1 (molecular formula identification) were submitted. The SIRIUS method and the parameters used are briefly described, followed by detailed analysis of the results and a discussion of cases where SIRIUS 2 was unable to come up with the correct molecular formula. SIRIUS 2 returns consistently high quality results, with the exception of fragmentation pattern analysis of time-of-flight data. We then discuss possibilities for further improving SIRIUS 2 in the future.},
    doi = {10.3390/metabo3020506},
    file = {DuehrkopEtAl_MolecularFormulaIdentification_Metabolites_2013.pdf:2013/DuehrkopEtAl_MolecularFormulaIdentification_Metabolites_2013.pdf:PDF},
    keywords = {jena; IDUN; MS; small molecules; isotope patterns; SIRIUS; fragmentation trees; FT;},
    owner = {Sebastian},
    pmid = {24958003},
    timestamp = {2013.04.19},
    }
  • [DOI] Guillemot, S. & Sikora, F. (2013) Finding and counting vertex-colored subtrees. IN Algorithmica, 65.828–844.
    [Bibtex]
    @Article{guillemot13finding,
    author = {Guillemot, Sylvain and Sikora, Florian},
    title = {Finding and counting vertex-colored subtrees},
    journal = {Algorithmica},
    year = {2013},
    volume = {65},
    number = {4},
    pages = {828--844},
    abstract = {The problems studied in this article originate from the GRAPH MOTIF problem introduced by Lacroix et al. (IEEE/ACM Trans. Comput. Biol. Bioinform. 3(4):360-368, 2006) in the context of biological networks. The problem is to decide if a vertex-colored graph has a connected subgraph whose colors equal a given multiset of colors M. It is a graph pattern-matching problem variant, where the structure of the occurrence of the pattern is not of interest but the only requirement is the connectedness. Using an algebraic framework recently introduced by Koutis (Proceedings of the 35th International Colloquium on Automata, Languages and Programming (ICALP), Lecture Notes in Computer Science, vol. 5125, pp. 575-586, 2008) and Koutis and Williams (Proceedings of the 36th International Colloquium on Automata, Languages and Programming (ICALP), Lecture Notes in Computer Science, vol. 5555, pp. 653-664, 2009), we obtain new FPT algorithms for GRAPH MOTIF and variants, with improved running times. We also obtain results on the counting versions of this problem, proving that the counting problem is FPT if M is a set, but becomes \#W[1]-hard if M is a multiset with two colors. Finally, we present an experimental evaluation of this approach on real datasets, showing that its performance compares favorably with existing software.},
    doi = {10.1007/s00453-011-9600-8},
    keywords = {fpt; jena; PABI},
    url = {http://arxiv.org/abs/1002.1880},
    }
  • [DOI] Jahn, K., Winter, S., Stoye, J. & Böcker, S. (2013) Statistics for approximate gene clusters. IN Bmc bioinf, 14.S14.
    [Bibtex]
    @Article{jahn13statistics,
    author = {Katharina Jahn and Sascha Winter and Jens Stoye and Sebastian B\"ocker},
    title = {Statistics for approximate gene clusters},
    journal = {BMC Bioinf},
    year = {2013},
    volume = {14},
    number = {Suppl 15},
    pages = {S14},
    note = {Proc. of \emph{RECOMB Satelite Workshop on Comparative Genomics} (RECOMB-CG 2013)},
    abstract = {Genes occurring co-localized in multiple genomes can be strong indicators for either functional constraints on the genome organization or remnant ancestral gene order. The computational detection of these patterns, which are usually referred to as gene clusters, has become increasingly sensitive over the past decade. The most powerful approaches allow for both imperfect cluster conservation, i.e. occurrences which can be missing some of the clustered genes, be disrupted by uninvolved genes, or internally rearranged, as well as the complete absence of the cluster in some or even most of the studied genomes. The detection of such low quality clusters increases the risk to mistake faint patterns that occur merely by chance for genuine findings. Therefore, it is crucial to estimate the significance of computational gene cluster predictions and discriminate between true conservation and coincidental clustering. In this paper, we present an efficient and accurate approach to estimate the significance of gene cluster predictions. Given a single gene cluster prediction, we calculate the probability to observe it with the same or a higher degree of conservation under the null hypothesis of random gene order, and add a correction factor to account for multiple testing. Our approach considers all parameters that define the quality of gene cluster conservation: the number of genomes in which the cluster occurs, the number of involved genes, the degree of conservation in the different genomes, as well as the frequency of the clustered genes within each genome. We apply our approach to evaluate gene cluster predictions in a large set of well annotated genomes.},
    doi = {10.1186/1471-2105-14-S15-S14},
    keywords = {jena; 3AGC; gene clusters;},
    owner = {Sebastian},
    pmid = {24564620},
    timestamp = {2013.07.26},
    }
  • Neumann, S., Rasche, F., Wolf, S. & Böcker, S. (2013) Metabolite identification and computational mass spectrometry. IN Weckwerth, W. & Kahl, G. (Eds.), The handbook of plant metabolomics, metabolite profiling and networking..
    [Bibtex]
    @InCollection{neumann13metabolite,
    author = {Steffen Neumann and Florian Rasche and Sebastian Wolf and Sebastian B\"ocker},
    title = {Metabolite Identification and Computational Mass Spectrometry},
    booktitle = {The Handbook of Plant Metabolomics, Metabolite Profiling and Networking},
    publisher = {Wiley-VCH Verlag},
    year = {2013},
    editor = {Wolfram Weckwerth and G\"unter Kahl},
    series = {Molecular Plant Biology Handbook Series},
    chapter = {16},
    pages = {271--285},
    abstract = {Previous chapters have introduced protocols and examples for high-throughput metabolomics experiments. Metabolite identification is an important step in these experiments, bridging the metabolomics experiment, metabolite profiling and the biological interpretation of the results. The elemental composition of the individual metabolites is the most basic information that can be calculated already from the MS profiling data. For a more thorough identification, the {\textquotedblleft}interesting{\textquotedblright} peaks are subjected to MS2, or even higher order MSn measurements. Such spectra carry rich structural hints, revealing building blocks of the unknown compound, or allowing comparison with databases of reference spectra. In this chapter we describe a general strategy to identify metabolites, and walk through the steps of the identification for two example compounds, first calculating elemental compositions, performing in silico identification without reference spectra, and finally spectral library lookup.},
    keywords = {Jena},
    owner = {fhufsky},
    timestamp = {2013.05.28},
    }
  • Rasche, F. (2013) Identification of small molecules using mass spectrometry: a fully automated pipeline proposing similar molecules and compound classes. PhD Thesis. Jena, Germany, .
    [Bibtex]
    @PhdThesis{rasche13identification,
    author = {Florian Rasche},
    title = {Identification of Small Molecules using Mass Spectrometry: A fully automated pipeline proposing similar molecules and compound classes},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2013},
    address = {Jena, Germany},
    file = {:2013/Rasche_IdentificationSmallMolecules_phdthesis_2013.pdf:PDF},
    keywords = {jena; linkpdf; fragmentation trees; ft; metabolites; small molecules; mass spectrometry; MS;},
    owner = {Sebastian},
    timestamp = {2013.03.11},
    }
  • [DOI] Rauf, I., Rasche, F., Nicolas, F. & Böcker, S. (2013) Finding maximum colorful subtrees in practice. IN J comput biol, 20.1–11.
    [Bibtex]
    @Article{rauf13finding,
    author = {Imran Rauf and Florian Rasche and Fran\c{c}ois Nicolas and Sebastian B\"ocker},
    title = {Finding Maximum Colorful Subtrees in practice},
    journal = {J Comput Biol},
    year = {2013},
    volume = {20},
    number = {4},
    pages = {1--11},
    abstract = {In metabolomics and other fields dealing with small compounds, mass spectrometry is applied as sensitive high-throughput technique. Recently, fragmentation trees have been proposed to automatically analyze the fragmentation mass spectra recorded by such instruments. Computationally, this leads to the problem of finding a maximum weight subtree in an edge weighted and vertex colored graph, such that every color appears at most once in the solution. We introduce new heuristics and an exact algorithm for this Maximum Colorful Subtree problem, and evaluate them against existing algorithms on real-world datasets. Our tree completion heuristic consistently scores better than other heuristics, while the integer programming-based algorithm produces optimal trees with modest running times. Our fast and accurate heuristic can help to determine molecular formulas based on fragmentation trees. On the other hand, optimal trees from the integer linear program are useful if structure is relevant, e.g., for tree alignments.},
    doi = {10.1089/cmb.2012.0083},
    keywords = {jena; metabolite ms; tandem ms; ms; ft; idun; TrACReview},
    owner = {Sebastian},
    pmid = {23509858},
    timestamp = {2011.02.01},
    }
  • [DOI] Scheubert, K., Hufsky, F. & Böcker, S. (2013) Computational mass spectrometry for small molecules. IN J cheminform, 5.12.
    [Bibtex]
    @Article{scheubert13computational,
    author = {Kerstin Scheubert and Franziska Hufsky and Sebastian B\"ocker},
    title = {Computational Mass Spectrometry for Small Molecules},
    journal = {J Cheminform},
    year = {2013},
    volume = {5},
    pages = {12},
    abstract = {The identification of small molecules from mass spectrometry (MS) data remains a major challenge in the interpretation of MS data. This review covers the computational aspects of identifying small molecules, from the identification of a compound searching a reference spectral library, to the structural elucidation of unknowns. In detail, we describe the basic principles and pitfalls of searching mass spectral reference libraries. Determining the molecular formula of the compound can serve as a basis for subsequent structural elucidation; consequently, we cover different methods for molecular formula identification, focussing on isotope pattern analysis. We then discuss automated methods to deal with mass spectra of compounds that are not present in spectral libraries, and provide an insight into de novo analysis of fragmentation spectra using fragmentation trees. In addition, this review shortly covers the reconstruction of metabolic networks using MS data. Finally, we list available software for different steps of the analysis pipeline.},
    doi = {10.1186/1758-2946-5-12},
    file = {ScheubertEtAl_ComputationalMassSpectrometry_JCheminf_2013.pdf:2013/ScheubertEtAl_ComputationalMassSpectrometry_JCheminf_2013.pdf:PDF},
    keywords = {jena; MS; Mass Spectrometry; metabolites; metabolite identification;},
    owner = {fhufsky},
    pmid = {23453222},
    timestamp = {2013.01.02},
    url = {http://www.jcheminf.com/content/5/1/12/abstract},
    }
  • Vogel, B. (2013) Improving FlipCut supertrees with posterior probabilities and suboptimal source trees. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{vogel13improving,
    author = {Bertram Vogel},
    title = {Improving {FlipCut} Supertrees with Posterior Probabilities and Suboptimal Source Trees},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2013},
    type = {Diplomarbeit},
    file = {Vogel_FlipCutPosteriorProbabilities_DiplomaThesis_2013.pdf:2013/Vogel_FlipCutPosteriorProbabilities_DiplomaThesis_2013.pdf:PDF},
    keywords = {jena; phylogenetics; FlipCut; supertrees;},
    owner = {bertram},
    timestamp = {2014.07.08},
    }
  • [DOI] Wieseke, N., Lechner, M., Ludwig, M. & Marz, M. (2013) POMAGO: multiple genome-wide alignment tool for bacteria Springer, berlin, 249–260.
    [Bibtex]
    @InProceedings{wieseke13pomago,
    author = {Nicolas Wieseke and Marcus Lechner and Marcus Ludwig and Manja Marz},
    booktitle = {Proc. of International Symposium on Bioinformatics Research and Applications (ISBRA 2013)},
    title = {{POMAGO}: Multiple Genome-Wide Alignment Tool for Bacteria},
    year = {2013},
    pages = {249--260},
    publisher = Springer,
    series = lncs,
    volume = {7875},
    abstract = {Multiple Genome-wide Alignments are a first crucial step to compare genomes. Gain and loss of genes, duplications and genomic rearrangements are challenging problems that aggravate with increasing phylogenetic distances. We describe a multiple genome-wide alignment tool for bacteria, called POMAGO, which is based on orthologous genes and their syntenic information determined by Proteinortho.This strategy enables POMAGO to efficiently define anchor points even across wide phylogenetic distances and outperform existing approaches in this field of application. The given set of orthologous genes is enhanced by several cleaning and completion steps, including the addition of previously undetected orthologous genes. Protein-coding genes are aligned on nucleotide and protein level, whereas intergenic regions are aligned on nucleotide level only. We tested and compared our program at three very different sets of bacteria that exhibit different degrees of phylogenetic distances: 1) 15 closely related, well examined and described E. coli species, 2) six more divergent Aquificales, as putative basal bacteria, and 3) a set of eight extreme divergent species, distributed among the whole phylogenetic tree of bacteria. POMAGO is written in a modular way which allows extending or even exchanging algorithms in different stages of the alignment process. Intergenic regions might for instance be aligned using an RNA secondary structure aware algorithm rather than to rely on sequence data alone. The software is freely available from http://www.rna.uni-jena.de/supplements/pomago},
    doi = {10.1007/978-3-642-38036-5_25},
    owner = {fhufsky},
    timestamp = {2014.01.13},
    }
  • Brinkmeyer, M. (2013) Polynomial supertree methods in phylogenetics. PhD Thesis. Jena; Germany, .
    [Bibtex]
    @PhdThesis{brinkmeyer13polynomial,
    author = {Malte Brinkmeyer},
    title = {Polynomial Supertree Methods in Phylogenetics},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2013},
    address = {Jena; Germany},
    file = {Brinkmeyer_PolynomialSupertreeMethods_phdthesis_2013.pdf:2013/Brinkmeyer_PolynomialSupertreeMethods_phdthesis_2013.pdf:PDF},
    keywords = {jena; supertrees; linkpdf},
    owner = {Sebastian},
    timestamp = {2013.03.11},
    }

2012

  • [DOI] Böcker, S. (2012) Comment on ''An efficient method to calculate the aggregated isotopic distribution and exact center-masses'' by Claesen et al.. IN J am soc mass spectrom, 23.1826–1827.
    [Bibtex]
    @Article{boecker12comment,
    author = {Sebastian B\"ocker},
    title = {Comment on ''{An} Efficient Method to Calculate the Aggregated Isotopic Distribution and Exact Center-Masses'' by {Claesen} et al.},
    journal = {J Am Soc Mass Spectrom},
    year = {2012},
    volume = {23},
    number = {10},
    pages = {1826--1827},
    abstract = {Claesen et al. recently presented an efficient method for computing the isotope pattern of a molecule, that is, both the isotope distribution and the center masses (also called "probability-weighted masses" and "aggregated isotopic variants) of the isotope peaks. They favorably compare their approach against five other methods for simulating isotope patterns: Their tool BRAIN is more accurate than any other tool, and computation times are well below one second even for huge molecules of mass above 533403 Da and when computing 1325 peak masses. Unfortunately, the authors fail to mention SIRIUS that is capable of performing such calculations, too.},
    doi = {10.1007/s13361-012-0402-2},
    file = {Boecker_CommentEfficientMethod_JASMS_2012_preprint.pdf:2012/Boecker_CommentEfficientMethod_JASMS_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; IDUN; Mass Spectrometry; MS; isotope pattern; isotope pattern analysis; SIRIUS},
    owner = {Sebastian},
    pmid = {22673835},
    timestamp = {2012.04.25},
    }
  • [DOI] Hufsky, F., Dührkop, K., Rasche, F., Chimani, M. & Böcker, S. (2012) Fast alignment of fragmentation trees. IN Bioinformatics, 28.i265–i273.
    [Bibtex]
    @Article{hufsky12fast,
    author = {Franziska Hufsky and Kai D\"uhrkop and Florian Rasche and Markus Chimani and Sebastian B\"ocker},
    title = {Fast alignment of fragmentation trees},
    journal = {Bioinformatics},
    year = {2012},
    volume = {28},
    number = {12},
    pages = {i265--i273},
    note = {Proc. of \emph{Intelligent Systems for Molecular Biology} (ISMB 2012)},
    abstract = {Mass spectrometry allows sensitive, automated and high-throughput analysis of small molecules such as metabolites. One major bottleneck in metabolomics is the identification of "unknown" small molecules not in any database. Recently, fragmentation tree alignments have been introduced for the automated comparison of the fragmentation patterns of small molecules. Fragmentation pattern similarities are strongly correlated with the chemical similarity of molecules, and allow us to cluster compounds based solely on their fragmentation patterns. Aligning fragmentation trees is computationally hard. Nevertheless, we present three exact algorithms for the problem: A dynamic programming (DP) algorithm, a sparse variant of the DP, and an Integer Linear Program (ILP). Evaluation of our methods on three different datasets showed that thousands of alignments can be computed in a matter of minutes using DP, even for "challenging" instances. Running times of the sparse DP were an order of magnitude better than for the classical DP. The ILP was clearly outperformed by both DP approaches. We also found that for both DP algorithms, computing the 1\% slowest alignments required as much time as computing the 99\% fastest.},
    doi = {10.1093/bioinformatics/bts207},
    file = {HufskyEtAl_FastAlignmentFragmentationTrees_ISMB_2012.pdf:2012/HufskyEtAl_FastAlignmentFragmentationTrees_ISMB_2012.pdf:PDF},
    keywords = {jena; metabolite ms; ms; idun; ft; tree alignment},
    owner = {Sebastian},
    pmid = {22689771},
    timestamp = {2011.10.19},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/bts207?ijkey=Xeve7Sksc5aV0qP&keytype=ref},
    }
  • [DOI] Yang, X., Sikora, F., Blin, G., Hamel, S., Rizzi, R. & Aluru, S. (2012) An algorithmic view on multi-related-segments: a unifying model for approximate common interval Springer, berlin, 319–329.
    [Bibtex]
    @InProceedings{yang12algorithmic,
    author = {Yang, Xiao and Sikora, Florian and Blin, Guillaume and Hamel, Sylvie and Rizzi, Romeo and Aluru, Srinivas},
    title = {An Algorithmic View on Multi-related-segments: A unifying model for approximate common interval},
    booktitle = {Proc. of Theory and Applications of Models of Computation (TAMC 2012)},
    year = {2012},
    volume = {7287},
    series = lncs,
    pages = {319--329},
    publisher = springer,
    abstract = {A set of genes that are proximately located on multiple chromosomes often implies their origin from the same ancestral genomic segment or their involvement in the same biological process. Among the numerous studies devoted to model and infer these gene sets, the recently introduced approximate common interval (ACI) models capture gene loss events in addition to the gene insertion, duplication and inversion events already incorporated by earlier models. However, the computational tractability of the corresponding problems remains open in most of the cases. In this contribution, we propose an algorithmic study of a unifying model for ACI, namely Multi-related-segments, and demonstrate that capturing gene losses induces intractability in many cases.},
    doi = {10.1007/978-3-642-29952-0_33},
    keywords = {jena; PABI; fpt; gene clusters},
    opteditor = {Manindra Agrawal and S. Barry Cooper and Angsheng Li},
    owner = {florian},
    timestamp = {2012.02.21},
    url = {http://hal-univ-mlv.archives-ouvertes.fr/hal-00630150/en},
    }
  • [DOI] Ullmann-Zeunert, L., Muck, A., Wielsch, N., Hufsky, F., Stanton, M. A., Bartram, S., Böcker, S., Baldwin, I. T., Groten, K. & Svatoš, A. (2012) Determination of 15N-incorporation into plant proteins and their absolute quantitation: a new tool to study nitrogen flux dynamics and protein pool sizes elicited by plant-herbivore interactions. IN J proteome res, 11.4947–4960.
    [Bibtex]
    @Article{ullmann-zeunert12determination,
    author = {Lynn Ullmann-Zeunert and Alexander Muck and Natalie Wielsch and Franziska Hufsky and Mariana A. Stanton and Stefan Bartram and Sebastian B\"ocker and Ian T. Baldwin and Karin Groten and Ale\v{s} Svato\v{s}},
    title = {Determination of {15{N}}-incorporation into plant proteins and their absolute quantitation: a new tool to study nitrogen flux dynamics and protein pool sizes elicited by plant-herbivore interactions},
    journal = {J Proteome Res},
    year = {2012},
    volume = {11},
    number = {10},
    pages = {4947--4960},
    abstract = {Nitrogen (N) is an essential and limiting nutrient for plant growth, reproduction, and defense against herbivores. Trade-offs between growth and defense after herbivory are well-studied, but we lack an understanding of the mechanisms responsible for these fitness tradeoffs, mechanisms that involve herbivory-elicited changes in protein abundance and N-flux among tissues, largely due to the lack of suitable methods. Here we demonstrate that a mass spectrometric data-independent acquisition approach known as LC-MSE, combined with a novel algorithm to quantify heavy atom enrichment in peptides is able to quantify elicited changes in protein amounts and 15N flux in a high throughput manner. The quantification of the labeled proteins compares the total ion count (TIC) of peptides derived from proteins of interest to a standard protein (BSA) and the calculation is based on the ratios of the measured spectra to the calculated ideal spectra. The method was able to reliably quantify rabbit phosphorylase b in complex leaf protein extract at concentrations to 300 fmol. The linear dynamic range, reproducibility of technical and biological replicates and differences between measured and expected 15N-incorporation into the small (SSU) and large (LSU) subunits of ribulose-1,5-bisphosphate-carboxylase/oxygenase (RuBisCO) and RuBisCO activase (RCA2) of Nicotiana attenuata plants grown in hydroponic culture at different known concentrations of 15N-labeled nitrate, were used to further evaluate the procedure. The utility of the method for whole-plant studies in ecological realistic contexts was demonstrated by using15N-pulse protocols to plants growing in soil under unknown 15N-incorporation levels. A time-course analysis revealed an exponential increase in 15N-incorporation with maximal values between 6.94 At\% and 8.91 At\%, and distinct patterns of protein amounts of the three proteins over time, while the amounts of total soluble protein declined with leaf age. Additionally we quantified the amounts of lipoxygenase 2 (LOX2) protein, an important enzyme in anti-herbivore defense, in control and elicited plants. The data indicate that N is more rapidly incorporated into LOX2 after attack potentially priming plants for a further attack as suggested by a transient increase in LOX2 levels. This method allows for in-depth quantitative proteomics and15N flux analyses of the metabolic dynamics elicited during plant-herbivore interactions.},
    doi = {10.1021/pr300465n},
    keywords = {jena; proteomics; isotope labeling; heavy isotopes; 15N; plants; herbivores},
    owner = {Sebastian},
    pmid = {22905865},
    timestamp = {2012.06.22},
    }
  • [DOI] Rizzi, R. & Sikora, F. (2012) Some results on more flexible versions of Graph Motif Springer, berlin, 278–289.
    [Bibtex]
    @InProceedings{rizzi12some,
    author = {Rizzi, Romeo and Sikora, Florian},
    title = {Some results on more flexible versions of {Graph Motif}},
    booktitle = {Proc. of Computer Science Symposium in Russia (CSR 2012)},
    year = {2012},
    volume = {7353},
    series = lncs,
    pages = {278--289},
    publisher = springer,
    abstract = {The problems studied in this paper originate from Graph Motif, a problem introduced in 2006 in the context of biological networks. Informally speaking, it consists in deciding if a multiset of colors occurs in a connected subgraph of a vertex-colored graph. Due to the high rate of noise in the biological data, more flexible definitions of the problem have been outlined. We present in this paper two inapproximability results for two different optimization variants of Graph Motif. We also study another definition of the problem, when the connectivity constraint is replaced by modularity. While the problem stays NP-complete, it allows algorithms in FPT for biologically relevant parameterizations.},
    doi = {10.1007/978-3-642-30642-6_26},
    keywords = {jena; PABI; fpt},
    owner = {florian},
    timestamp = {2012.02.21},
    url = {http://arxiv.org/abs/1202.5184},
    }
  • Rempt, M. (2012) Investigation of novel pathways and metabolites involved in the chemical defense of bryophytes and macroalgae. PhD Thesis. Jena, Germany, .
    [Bibtex]
    @PhdThesis{rempt12investigation,
    author = {Martin Rempt},
    title = {Investigation of novel pathways and metabolites involved in the chemical defense of bryophytes and macroalgae},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2012},
    address = {Jena, Germany},
    owner = {fhufsky},
    timestamp = {2013.10.21},
    }
  • [DOI] Rauf, I., Rasche, F., Nicolas, F. & Böcker, S. (2012) Finding maximum colorful subtrees in practice Springer, berlin, 213–223.
    [Bibtex]
    @InProceedings{rauf12finding,
    author = {Imran Rauf and Florian Rasche and Fran\c{c}ois Nicolas and Sebastian B\"ocker},
    title = {Finding Maximum Colorful Subtrees in practice},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2012)},
    year = {2012},
    volume = {7262},
    series = lncs,
    pages = {213--223},
    publisher = Springer,
    abstract = {In metabolomics and other fields dealing with small compounds, mass spectrometry is applied as sensitive high-throughput technique. Recently, fragmentation trees have been proposed to automatically analyze the fragmentation mass spectra recorded by such instruments. Computationally, this leads to the problem of finding a maximum weight subtree in an edge weighted and vertex colored graph, such that every color appears at most once in the solution. We introduce new heuristics and an exact algorithm for this Maximum Colorful Subtree problem, and evaluate them against existing algorithms on real-world datasets. Our tree completion heuristic consistently scores better than other heuristics, while the integer programming-based algorithm produces optimal trees with modest running times. Our fast and accurate heuristic can help to determine molecular formulas based on fragmentation trees. On the other hand, optimal trees from the integer linear program are useful if structure is relevant, e.g., for tree alignments.},
    doi = {10.1007/978-3-642-29627-7_22},
    file = {RaufEtAl_FindingMaximumColorful_RECOMB_2012.pdf:2012/RaufEtAl_FindingMaximumColorful_RECOMB_2012.pdf:PDF},
    journalref = {rauf13finding},
    keywords = {jena; metabolite ms; tandem ms; ms; ft; idun; TrACReview},
    owner = {Sebastian},
    timestamp = {2011.02.01},
    url = {http://www.springerlink.com/content/j147r771880655l3/},
    }
  • [DOI] Rasche, F., Scheubert, K., Hufsky, F., Zichner, T., Kai, M., Svatoš, A. & Böcker, S. (2012) Identifying the unknowns by aligning fragmentation trees. IN Anal chem, 84.3417–3426.
    [Bibtex]
    @Article{rasche12identifying,
    author = {Florian Rasche and Kerstin Scheubert and Franziska Hufsky and Thomas Zichner and Marco Kai and Ale\v{s} Svato\v{s} and Sebastian B\"ocker},
    title = {Identifying the unknowns by aligning fragmentation trees},
    journal = {Anal Chem},
    year = {2012},
    volume = {84},
    number = {7},
    pages = {3417--3426},
    abstract = {Mass spectrometry allows sensitive, automated and high-throughput analysis of small molecules. In principle, tandem mass spectrometry allows us to identify ``unknown'' small molecules not in any database, but the automated interpretation of such data is in its infancy. Fragmentation trees have recently been introduced for the automated analysis of the fragmentation patterns of small molecules. We present a method for the automated comparison of such fragmentation patterns, based on aligning the compounds' fragmentation trees. We cluster compounds based solely on their fragmentation patterns, and show a good agreement with known compound classes. Fragmentation pattern similarities are strongly correlated with the chemical similarity of molecules. We present a tool for searching a database for compounds with fragmentation pattern similar to an unknown sample compound. We apply this tool to metabolites from Icelandic poppy. Our method allows fully automated computational identification of small molecules that cannot be found in any database.},
    data = {tandemms_data},
    doi = {10.1021/ac300304u},
    file = {RascheEtAl_IdentifyingUnknownsAligning_AnalChem_2012_preprint.pdf:2012/RascheEtAl_IdentifyingUnknownsAligning_AnalChem_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; fragmentation tree; FT; alignment; tree alignment; mass spectrometry; MS; LC-MS; de novo; fragmentation; tandem MS; idun},
    owner = {Sebastian},
    pmid = {22390817},
    timestamp = {2011.08.09},
    }
  • [DOI] Ludwig, M., Hufsky, F., Elshamy, S. & Böcker, S. (2012) Finding characteristic substructures for metabolite classes Schloss dagstuhl–leibniz-zentrum fuer informatik, 23–38.
    [Bibtex]
    @InProceedings{ludwig12finding,
    author = {Marcus Ludwig and Franziska Hufsky and Samy Elshamy and Sebastian B\"ocker},
    title = {Finding Characteristic Substructures for Metabolite Classes},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2012)},
    year = {2012},
    volume = {26},
    series = {OpenAccess Series in Informatics (OASIcs)},
    pages = {23--38},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    abstract = {We introduce a method to find a characteristic substructure for a set of molecular structures. Different from common approaches, such as computing the maximum common subgraph, the resulting substructure does not have to be contained in its exact form in all input molecules. Our approach is part of the identification pipeline for unknown metabolites using fragmentation trees. Searching databases using fragmentation tree alignment results in hit lists containing compounds with large structural similarity to the unknown metabolite. The characteristic substructure of the molecules in the hit list may be a key structural element of the unknown compound and might be used as starting point for structural elucidation. We evaluate our method on different data sets and find that it retrieves essential substructures if the input lists are not too heterogeneous. We apply our method to predict structural elements for five unknown samples from Icelandic poppy.},
    doi = {10.4230/OASIcs.GCB.2012.23},
    keywords = {jena; IDUN; fragmentation trees; FT-BLAST; molecular substructures},
    owner = {Sebastian},
    timestamp = {2012.07.13},
    url = {http://drops.dagstuhl.de/opus/volltexte/2012/3715},
    }
  • Kynast, P. (2012) Segmentierung von Massenspektrometriebildern. Master Thesis. .
    [Bibtex]
    @MastersThesis{kynast12segmentierung,
    author = {Kynast, Philipp},
    title = {{Segmentierung von Massenspektrometriebildern}},
    school = {Friedrich Schiller University Jena},
    year = {2012},
    owner = {Purva},
    publisher = {Friedrich-Schiller-Universit{\"{a}}t Jena},
    timestamp = {2016-12-26},
    url = {http://pubman.mpdl.mpg.de/pubman/item/escidoc:1719271:3},
    }
  • [DOI] Hufsky, F., Rempt, M., Rasche, F., Pohnert, G. & Böcker, S. (2012) De novo analysis of electron impact mass spectra using fragmentation trees. IN Anal chim acta, 739.67–76.
    [Bibtex]
    @Article{hufsky12de-novo,
    author = {Franziska Hufsky and Martin Rempt and Florian Rasche and Georg Pohnert and Sebastian B\"ocker},
    title = {De Novo Analysis of Electron Impact Mass Spectra Using Fragmentation Trees},
    journal = {Anal Chim Acta},
    year = {2012},
    volume = {739},
    pages = {67--76},
    abstract = {The automated fragmentation analysis of high resolution EI mass spectra based on a fragmentation tree algorithm is introduced. Fragmentation trees are constructed from EI spectra by automated signal extraction and evaluation. These trees explain relevant fragmentation reactions and assign molecular formulas to fragments. The method enables the identification of the molecular ion and the molecular formula of a metabolite if the molecular ion is present in the spectrum. These identifications are independent of existing library knowledge and, thus, support assignment and structural elucidation of unknown compounds. The method works even if the molecular ion is of very low abundance or hidden under contaminants with higher masses. We apply the algorithm to a selection of 50 derivatized and underivatized metabolites and demonstrate that in 78\% of cases the molecular ion can be correctly assigned. The automatically constructed fragmentation trees correspond very well to published mechanisms and allow the assignment of specific relevant fragments and fragmentation pathways even in the most complex EI-spectra in our dataset. This method will be very helpful in the automated analysis of metabolites that are not included in common libraries and it thus has the potential to support the explorative character of metabolomics studies.},
    data = {gcms_data},
    doi = {10.1016/j.aca.2012.06.021},
    file = {HufskyEtAl_DeNovoAnalysisElectron_AnalChimActa_2012.pdf:2012/HufskyEtAl_DeNovoAnalysisElectron_AnalChimActa_2012.pdf:PDF},
    keywords = {jena; IDUN; metabolite ms; gcms; ms; ft; fragmentation trees},
    owner = {Sebastian},
    pmid = {22819051},
    timestamp = {2011.09.17},
    }
  • [DOI] Hufsky, F. & Böcker, S. (2012) Comparing fragmentation trees from electron impact mass spectra with annotated fragmentation pathways Schloss dagstuhl–leibniz-zentrum fuer informatik, 12–22.
    [Bibtex]
    @InProceedings{hufsky12comparing,
    author = {Franziska Hufsky and Sebastian B\"ocker},
    title = {Comparing Fragmentation Trees from Electron Impact Mass Spectra with Annotated Fragmentation Pathways},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2012)},
    year = {2012},
    volume = {26},
    series = {OpenAccess Series in Informatics (OASIcs)},
    pages = {12--22},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    abstract = {Electron impact ionization (EI) is the most common form of ionization for GC-MS analysis of small molecules. This ionization method results in a mass spectrum not necessarily containing the molecular ion peak. The fragmentation of small compounds during EI is well understood, but manual interpretation of mass spectra is tedious and time-consuming. Methods for automated analysis are highly sought, but currently limited to database searching and rule-based approaches. With the computation of hypothetical fragmentation trees from high mass GC-MS data the high-throughput interpretation of such spectra may become feasible. We compare these trees with annotated fragmentation pathways. We find that fragmentation trees explain the origin of the ions found in the mass spectra in accordance to the literature. No peak is annotated with an incorrect fragment formula and 78.7\% of the fragmentation processes are correctly reconstructed.},
    doi = {10.4230/OASIcs.GCB.2012.12},
    keywords = {jena; IDUN; fragmentation trees; GC-MS; EI; Electron Impact Ionization;},
    owner = {Sebastian},
    timestamp = {2012.07.13},
    url = {http://drops.dagstuhl.de/opus/volltexte/2012/3714},
    }
  • [DOI] Böcker, S. (2012) A golden ratio parameterized algorithm for cluster editing. IN J discrete algorithms, 16.79–89.
    [Bibtex]
    @Article{boecker12golden,
    author = {Sebastian B\"ocker},
    title = {A golden ratio parameterized algorithm for Cluster Editing},
    journal = {J Discrete Algorithms},
    year = {2012},
    volume = {16},
    pages = {79--89},
    abstract = {The Cluster Editing problem asks to transform a graph by at most k edge modifications into a disjoint union of cliques. The problem is NP-complete, but several parameterized algorithms are known. We present a novel search tree algorithm for the problem, which improves running time from O*(1.76^k) to O*(1.62^k). In detail, we can show that we can always branch with branching vector (2,1) or better, resulting in the golden ratio as the base of the search tree size. Our algorithm uses a well-known transformation to the integer-weighted counterpart of the problem. To achieve our result, we combine three techniques: First, we show that zero-edges in the graph enforce structural features that allow us to branch more efficiently. Second, by repeatedly branching we can isolate vertices, releasing costs. Finally, we use a known characterization of graphs with few conflicts.},
    doi = {10.1016/j.jda.2012.04.005},
    file = {Boecker_GoldenRatioParameterizedAlgorithm_JDiscreteAlgorithms_2012_preprint.pdf:2012/Boecker_GoldenRatioParameterizedAlgorithm_JDiscreteAlgorithms_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; PABI; fpt; cluster editing},
    owner = {Sebastian},
    timestamp = {2011.01.29},
    }
  • Hölzer, M. (2012) Datenmanagement von Massenspektren und Fragmentierungsbäumen mit BExIS. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{hoelzer12datenmanagement,
    author = {Martin H\"olzer},
    title = {Datenmanagement von {Massenspektren} und {Fragmentierungsb\"aumen} mit {BExIS}},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2012},
    type = {Diplomarbeit},
    keywords = {jena; ms; ft;},
    owner = {bertram},
    timestamp = {2014.08.08},
    }
  • Griebel, T. (2012) Modular software for phylogenetic analysis connecting algorithms, applications and data. PhD Thesis. .
    [Bibtex]
    @PhdThesis{griebel12modular,
    author = {Thasso Griebel},
    title = {Modular Software for Phylogenetic Analysis Connecting Algorithms, Applications and Data},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2012},
    owner = {maltebrinkmeyer},
    timestamp = {2012.03.12},
    }
  • Fleischauer, M. (2012) Bessere Supertrees durch suboptimale Schnitte im FlipCut Graphen. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{fleischauer12bessere,
    author = {Markus Fleischauer},
    title = {Bessere {Supertrees} durch suboptimale {Schnitte} im {FlipCut Graphen}},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2012},
    type = {Diplomarbeit},
    keywords = {jena; phylogenetics; FlipCut; supertrees;},
    owner = {bertram},
    timestamp = {2014.08.08},
    }
  • Dührkop, K. (2012) A sparse dynamic programming algorithm for aligning fragmentation trees. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{duehrkop12sparse,
    author = {Kai D\"uhrkop},
    title = {A sparse dynamic programming algorithm for aligning fragmentation trees},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2012},
    type = {Diplomarbeit},
    keywords = {jena; ms},
    owner = {kaidu},
    timestamp = {2014.8.08},
    }
  • [DOI] Blin, G., Bonizzoni, P., Dondi, R. & Sikora, F. (2012) On the parameterized complexity of the repetition free longest common subsequence problem. IN Inform process lett, 112.272–276.
    [Bibtex]
    @Article{blin12parameterized,
    author = {Blin, Guillaume and Bonizzoni, Paola and Dondi, Riccardo and Sikora, Florian},
    title = {On the Parameterized Complexity of the Repetition Free Longest Common Subsequence Problem},
    journal = IPL,
    year = {2012},
    volume = {112},
    number = {7},
    pages = {272--276},
    abstract = {Longest common subsequence is a widely used measure to compare strings, in particular in computational biology. Recently, several variants of the longest common subsequence have been introduced to tackle the comparison of genomes. In particular, the Repetition Free Longest Common Subsequence (RFLCS) problem is a variant of the LCS problem that asks for a longest common subsequence of two input strings with no repetition of symbols. In this paper, we investigate the parameterized complexity of RFLCS. First, we show that the problem does not admit a polynomial kernel. Then, we present a randomized FPT algorithm for the RFLCS problem, improving the time complexity of the existent FPT algorithm.},
    doi = {10.1016/j.ipl.2011.12.009},
    keywords = {fpt; jena; PABI},
    owner = {florian},
    timestamp = {2011.12.20},
    url = {http://hal-univ-mlv.archives-ouvertes.fr/hal-00637255/en},
    }
  • [DOI] Blin, G., Bonizzoni, P., Dondi, R., Rizzi, R. & Sikora, F. (2012) Complexity insights of the minimum duplication problem Springer, berlin, 153–164.
    [Bibtex]
    @InProceedings{blin12complexity,
    author = {Blin, Guillaume and Bonizzoni, Paola and Dondi, Riccardo and Rizzi, Romeo and Sikora, Florian},
    title = {Complexity Insights of the Minimum Duplication Problem},
    booktitle = {Proc. of Current Trends in Theory and Practice of Computer Science (SOFSEM 2012)},
    year = {2012},
    volume = {7147},
    series = lncs,
    pages = {153--164},
    publisher = Springer,
    abstract = {The Minimum Duplication problem is a well-known problem in phylogenetics and comparative genomics. Given a set of gene trees, the Minimum Duplication problem asks for a species tree that induces the minimum number of gene duplications in the input gene trees. More recently, a variant of the Minimum Duplication problem, called Minimum Duplication Bipartite, has been introduced in [14], where the goal is to find all pre-duplications, that is duplications that precede, in the evolution, the first speciation with respect to a species tree. In this paper, we investigate the complexity of both Minimum Duplication and Minimum Duplication Bipartite problems. First of all, we prove that the Minimum Duplication problem is APX-hard, even when the input consists of five uniquely leaf-labelled gene trees (progressing on the complexity of the problem). Then, we show that the Minimum Duplication Bipartite problem can be solved efficiently by a randomized algorithm when the input gene trees have bounded depth.},
    doi = {10.1007/978-3-642-27660-6_13},
    keywords = {jena; fpt; PABI},
    opteditor = {M{\'a}ria Bielikov{\'a} and Gerhard Friedrich and Georg Gottlob and Stefan Katzenbeisser and Gy{\"o}rgy Tur{\'a}n},
    owner = {florian},
    timestamp = {2011.10.11},
    url = {http://hal-univ-mlv.archives-ouvertes.fr/hal-00629047/en/},
    }
  • Biering, A. (2012) Undisputed siblings in FlipCut supertrees. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{biering12undisputed,
    author = {Antje Biering},
    title = {Undisputed Siblings in {FlipCut} Supertrees},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2012},
    type = {Diplomarbeit},
    keywords = {jena; phylogenetics; FlipCut; supertrees;},
    owner = {bertram},
    timestamp = {2014.08.08},
    }
  • [DOI] Böcker, S. & Damaschke, P. (2012) A note on the parameterized complexity of unordered maximum tree orientation. IN Discrete appl math, 160.1634–1638.
    [Bibtex]
    @Article{boecker12note,
    author = {Sebastian B\"ocker and Peter Damaschke},
    title = {A Note on the Parameterized Complexity of Unordered Maximum Tree Orientation},
    journal = {Discrete Appl Math},
    year = {2012},
    volume = {160},
    number = {10-11},
    pages = {1634--1638},
    abstract = {In the Unordered Maximum Tree Orientation problem, a set P of paths in a tree and a parameter k is given, and we want to orient the edges in the tree such that all but at most k paths in P become directed paths. This is a more difficult variant of a well-studied problem in computational biology where the directions of paths in P are already given. We show that the parameterized complexity of the unordered version is between Edge Bipartization and Vertex Bipartization, and we give a characterization of orientable path sets in trees by forbidden substructures, which are cycles of a certain kind.},
    doi = {10.1016/j.dam.2012.02.017},
    file = {BoeckerDamaschke_NoteParameterizedComplexity_DAM_2012_preprint.pdf:2012/BoeckerDamaschke_NoteParameterizedComplexity_DAM_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; tree orientation; fpt; PABI},
    owner = {Sebastian},
    timestamp = {2011.11.18},
    }
  • [DOI] Böcker, S., Bui, Q. B. A. & Truss, A. (2012) Improved fixed-parameter algorithms for minimum-flip consensus trees. IN Acm trans algorithms, 8.17 pages.
    [Bibtex]
    @Article{boecker12improved,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Anke Truss},
    title = {Improved Fixed-Parameter Algorithms for Minimum-Flip Consensus Trees},
    journal = {ACM Trans Algorithms},
    year = {2012},
    volume = {8},
    number = {1},
    pages = {17 pages},
    note = {Article~7},
    abstract = {In computational phylogenetics, the problem of constructing a consensus tree for a given set of rooted input trees has frequently been addressed. In this paper we study the Minimum-Flip Problem: the input trees are transformed into a binary matrix, and we want to find a perfect phylogeny for this matrix using a minimum number of flips, that is, corrections of single entries in the matrix. The graph-theoretical formulation of the problem is as follows: Given a bipartite graph G = (V_t \cup V_c , E), the task is to find a minimum set of edge modifications such that the resulting graph has no induced path with four edges that starts and ends in V_t , where V_t corresponds to the taxa set and Vc corresponds to the character set. We present two fixed-parameter algorithms for the Minimum-Flip Problem, one with running time O(4.83^k + poly(m, n)) and another one with running time O(4.42^k + poly(m, n)) for n taxa, m characters, k flips, and poly(m, n) denotes a polynomial function in m and n. Additionally, we discuss several heuristic improvements. We also report computational results on phylogenetic data.},
    doi = {10.1145/2071379.2071386},
    file = {BoeckerEtAl_ImprovedFixedParameterAlgorithms_TALG_2012_preprint.pdf:2012/BoeckerEtAl_ImprovedFixedParameterAlgorithms_TALG_2012_preprint.pdf:PDF},
    keywords = {jena; FPT; linkpdf; PABI},
    optmonth = jan,
    owner = {baoanh},
    timestamp = {2009.06.02},
    }
  • Böcker, S. (2012) Ten times eighteen .
    [Bibtex]
    @TechReport{boecker12ten,
    author = {Sebastian B\"ocker},
    title = {Ten times eighteen},
    year = {2012},
    note = {arXiv:1209.1977v1},
    abstract = {We consider the following simple game: We are given a table with ten slots indexed one to ten. In each of the ten rounds of the game, three dice are rolled and the numbers are added. We then put this number into any free slot. For each slot, we multiply the slot index with the number in this slot, and add up the products. The goal of the game is to maximize this score. In more detail, we play the game many times, and try to maximize the sum of scores or, equivalently, the expected score. We present a strategy to optimally play this game with respect to the expected score. We then modify our strategy so that we need only polynomial time and space. Finally, we show that knowing all ten rolls in advance, results in a relatively small increase in score. Although the game has a random component and requires a non-trivial strategy to be solved optimally, this strategy needs only polynomial time and space.},
    keywords = {jena;},
    owner = {Sebastian},
    timestamp = {2010.08.15},
    url = {http://arxiv.org/abs/1209.1977},
    }
  • [DOI] (2012) German conference on bioinformatics 2012 Böcker, S., Hufsky, F., Scheubert, K., Schleicher, J. & Schuster, S. (Eds.), Schloss dagstuhl–leibniz-zentrum fuer informatik.
    [Bibtex]
    @Proceedings{boecker12german,
    title = {German Conference on Bioinformatics 2012},
    year = {2012},
    editor = {Sebastian B\"ocker and Franziska Hufsky and Kerstin Scheubert and Jana Schleicher and Stefan Schuster},
    volume = {26},
    series = {OpenAccess Series in Informatics (OASIcs)},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    isbn = {978-3-939897-44-6},
    doi = {10.4230/OASIcs.GCB.2012},
    issn = {2190-6807},
    keywords = {jena},
    owner = {Sebastian},
    timestamp = {2012.09.14},
    url = {http://drops.dagstuhl.de/portals/oasics/index.php?semnr=12011},
    }

2011

  • [DOI] Böcker, S. (2011) A golden ratio parameterized algorithm for cluster editing Springer, berlin, 85–95.
    [Bibtex]
    @InProceedings{boecker11golden,
    author = {Sebastian B\"ocker},
    title = {A golden ratio parameterized algorithm for Cluster Editing},
    booktitle = {Proc. of International Workshop on Combinatorial Algorithms (IWOCA 2011)},
    year = {2011},
    volume = {7056},
    series = lncs,
    pages = {85--95},
    publisher = Springer,
    abstract = {The Cluster Editing problem asks to transform a graph by at most k edge modifications into a disjoint union of cliques. The problem is NP-complete, but several parameterized algorithms are known. We present a novel search tree algorithm for the problem, which improves running time from O*(1.76^k) to O*(1.62^k). In detail, we can show that we can always branch with branching vector (2,1) or better, resulting in the golden ratio as the base of the search tree size. Our algorithm uses a well-known transformation to the integer-weighted counterpart of the problem. To achieve our result, we combine three techniques: First, we show that zero-edges in the graph enforce structural features that allow us to branch more efficiently. Second, by repeatedly branching we can isolate vertices, releasing costs. Finally, we use a known characterization of graphs with few conflicts.},
    doi = {10.1007/978-3-642-25011-8_7},
    file = {Boecker_GoldenRatioParameterized_IWOCA_2011_preprint.pdf:2011/Boecker_GoldenRatioParameterized_IWOCA_2011_preprint.pdf:PDF},
    journalref = {boecker12golden},
    keywords = {jena; linkpdf; PABI; FPT; cluster editing},
    opteditor = {C.S. Iliopoulos and W.F. Smyth},
    owner = {Sebastian},
    timestamp = {2011.01.29},
    }
  • [DOI] Dehof, A. K., Rurainski, A., Bui, Q. B. A., Böcker, S., Lenhof, H. & Hildebrandt, A. (2011) Automated bond order assignment as an optimization problem. IN Bioinformatics, 27.619–625.
    [Bibtex]
    @Article{dehof11automated,
    author = {Anna Katharina Dehof and Alexander Rurainski and Quang Bao Anh Bui and Sebastian B\"ocker and Hans-Peter Lenhof and Andreas Hildebrandt},
    title = {Automated Bond Order Assignment as an Optimization Problem},
    journal = {Bioinformatics},
    year = {2011},
    volume = {27},
    number = {5},
    pages = {619--625},
    abstract = {Numerous applications in Computational Biology process molecular structures and hence strongly rely not only on correct atomic coordinates, but also on correct bond order information. For proteins and nucleic acids, bond orders can be easily deduced but this does not hold for other types of molecules like ligands. For ligands bond order information is not always provided in molecular databases and thus a variety of approaches tackling this problem have been developed. In this work we extend an ansatz proposed by (Wang, 2006) that assigns connectivity based penalty scores and tries to heuristically approximate its optimum. In this work we present three efficient and exact solvers for the problem replacing the heuristic approximation scheme of the original approach: an A* scheme, an ILP formulation, and an FPT approach. We implemented and evaluated the original implementation, our A*, ILP, and FPT formulation on the MMFF94 validation suite and the KEGG Drug database. We show the benefit of computing exact solutions of the penalty minimization problem and the additional gain when computing all optimal (or even suboptimal) solutions. We close with a detailed comparison of our methods. The A* and ILP solution are integrated into the open-source C++ LGPL library BALL (Kohlbacher, 2000; Hildebrandt, 2010) and the molecular visualization and modelling tool BALLView and can be downloaded from our homepage www.ball-project.org. The FPT implementation can be downloaded from http://bio.informatik.uni-jena.de/software/.},
    doi = {10.1093/bioinformatics/btq718},
    keywords = {jena; linkpdf; FPT; bond orders; bond order assignment problem},
    owner = {baoanh},
    pmid = {21245051},
    timestamp = {2010.08.30},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btq718?ijkey=dFsXGECTjNzzMNQ&keytype=ref},
    }
  • [DOI] Wittkop, T., Emig, D., Truss, A., Albrecht, M., Böcker, S. & Baumbach, J. (2011) Comprehensive cluster analysis with transitivity clustering. IN Nat protocols, 6.285–295.
    [Bibtex]
    @Article{wittkop11comprehensive,
    author = {Tobias Wittkop and Dorothea Emig and Anke Truss and Mario Albrecht and Sebastian B\"ocker and Jan Baumbach},
    title = {Comprehensive cluster analysis with Transitivity Clustering},
    journal = {Nat Protocols},
    year = {2011},
    volume = {6},
    pages = {285--295},
    abstract = {Transitivity Clustering is a method for the partitioning of biological data, a long-standing challenge in computational biology. It is based on weighted transitive graph projection and provides integrated access to various functions, in order to addressing each step of point in a typical cluster analysis. To facilitate this, Transitivity Clustering offers three user-friendly interfaces: a powerful stand-alone version, a web interface, and a collection of Cytoscape plug-ins. Here, we describe three major workflows: (1) integrated protein sequence cluster analysis with Cytoscape; (2) semi-supervised protein family detection; and (3) clustering of gene expression data. This protocol guides the user through the most important features of Transitivity Clustering and takes one hour to complete.},
    doi = {10.1038/nprot.2010.197},
    keywords = {jena; cluster editing; transitivity clustering; TransClust},
    owner = {Sebastian},
    pmid = {21372810},
    timestamp = {2010.11.10},
    }
  • [DOI] Scheubert, K., Hufsky, F., Rasche, F. & Böcker, S. (2011) Computing fragmentation trees from metabolite multiple mass spectrometry data. IN J comput biol, 18.1383–1397.
    [Bibtex]
    @Article{scheubert11computing-jcb,
    author = {Kerstin Scheubert and Franziska Hufsky and Florian Rasche and Sebastian B\"ocker},
    title = {Computing Fragmentation Trees from Metabolite Multiple Mass Spectrometry Data},
    journal = {J Comput Biol},
    year = {2011},
    volume = {18},
    number = {11},
    pages = {1383--1397},
    abstract = {Since metabolites cannot be predicted from the genome sequence, high-throughput de novo identification of small molecules is highly sought. Mass spectrometry (MS) in combination with a fragmentation technique is commonly used for this task. Unfortunately, automated analysis of such data is in its infancy. Recently, fragmentation trees have been proposed as an analysis tool for such data. Additional fragmentation steps (MSn) reveal more information about the molecule. We propose to use MSn data for the computation of fragmentation trees, and present the Colorful Subtree Closure problem to formalize this task: There, we search for a colorful subtree inside a vertex-colored graph, such that the weight of the transitive closure of the subtree is maximal. We give several negative results regarding the tractability and approximability of this and related problems. We then present an exact dynamic programming algorithm, which is parameterized by the number of colors in the graph and is swift in practice. Evaluation of our method on a dataset of 45 reference compounds showed that the quality of constructed fragmentation trees is improved by using MSn instead of MS2 measurements.},
    doi = {10.1089/cmb.2011.0168},
    keywords = {jena; mass spectrometry; fragmentation tree; multiple MS; small molecules; metabolite ms; ft; idun; ms; MSn; TrACReview},
    pmid = {22035289},
    }
  • [DOI] Scheubert, K., Hufsky, F., Rasche, F. & Böcker, S. (2011) Computing fragmentation trees from metabolite multiple mass spectrometry data Springer, berlin, 377–391.
    [Bibtex]
    @InProceedings{scheubert11computing,
    author = {Kerstin Scheubert and Franziska Hufsky and Florian Rasche and Sebastian B\"ocker},
    title = {Computing fragmentation trees from metabolite multiple mass spectrometry data},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2011)},
    year = {2011},
    volume = {6577},
    series = lncs,
    pages = {377--391},
    publisher = Springer,
    abstract = {Since metabolites cannot be predicted from the genome sequence, high-throughput de-novo identification of small molecules is highly sought. Mass spectrometry (MS) in combination with a fragmentationtechnique is commonly used for this task. Unfortunately, automated analysis of such data is in its infancy. Recently, fragmentation trees have been proposed as an analysis tool for such data. Additional fragmentation steps (MS^n) reveal more information about the molecule. We propose to use MS^n data for the computation of fragmentation trees, and present the Colorful Subtree Closure problem to formalize this task: There, we search for a colorful subtree inside a vertex-colored graph, such that the weight of the transitive closure of the subtree is maximal. We give several negative results regarding the tractability and approximability of this and related problems. We then present an exact dynamic programming algorithm, which is parameterized by the number of colors in the graph and is swift in practice. Evaluation of our method on a dataset of 45 reference compounds showed that the quality of constructed fragmentation trees is improved by using MS^n instead of MS^2 measurements.},
    doi = {10.1007/978-3-642-20036-6_36},
    file = {:2011/ScheubertEtAl_ComputingFragmentationTrees_RECOMB_2011_preprint.pdf:PDF},
    journalref = {scheubert11computing-jcb},
    keywords = {jena; linkpdf; MS; Mass Spectrometry; metabolites; metabolite identification; multiple MS; MSn; metabolite ms; ft; idun;},
    owner = {Sebastian},
    timestamp = {2010.12.07},
    }
  • [DOI] Rasche, F., Svatoš, A., Maddula, R. K., Böttcher, C. & Böcker, S. (2011) Computing fragmentation trees from tandem mass spectrometry data. IN Anal chem, 83.1243–1251.
    [Bibtex]
    @Article{rasche11computing,
    author = {Florian Rasche and Ale\v{s} Svato\v{s} and Ravi Kumar Maddula and Christoph B\"ottcher and Sebastian B\"ocker},
    title = {Computing fragmentation trees from tandem mass spectrometry data},
    journal = {Anal Chem},
    year = {2011},
    volume = {83},
    number = {4},
    pages = {1243--1251},
    abstract = {The structural elucidation of organic compounds in complex biofluids and tissues remains a significant analytical challenge. For mass spectrometry, the manual interpretation of collision-induced dissociation (CID) mass spectra is cumbersome and requires expert-knowledge, as the fragmentation mechanisms of ions formed from small molecules are not completely understood. The automated identification of compounds is generally limited to searching in spectral libraries. Here, we present a method for interpreting the CID spectra of the organic compounds protonated ions by computing fragmentation trees that establish not only the molecular formula of the compound and all fragment ions, but also the dependencies between fragment ions. This is an important step towards the automated identification of unknowns from the CID spectra of compounds that are not in any database.},
    data = {tandemms_data},
    doi = {10.1021/ac101825k},
    erratum = {http://pubs.acs.org/doi/abs/10.1021/ac201785d},
    file = {RascheEtAl_ComputingFragmentationTrees_AnalChem_2011.pdf:2011/RascheEtAl_ComputingFragmentationTrees_AnalChem_2011.pdf:PDF},
    keywords = {jena; Mass Spectrometry; ms; metabolite ms; tandem ms; metabolite identification; ft; idun; TrACReview},
    owner = {Sebastian},
    pmid = {21182243},
    timestamp = {2009.09.30},
    }
  • Ludwig, M. (2011) Identifizierung repräsentativer teilstrukturen in metaboliten-klassen. bachelor thesis Master Thesis. .
    [Bibtex]
    @MastersThesis{ludwig11identifizierung,
    author = {Marcus Ludwig},
    title = {Identifizierung repr\"asentativer Teilstrukturen in Metaboliten-Klassen},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2011},
    type = {bachelor thesis},
    owner = {fhufsky},
    timestamp = {2011.11.07},
    }
  • Kreisel, C. (2011) Implementierung eines algorithmus zur analyse von tandem-massenspektren. bachelor thesis Master Thesis. .
    [Bibtex]
    @MastersThesis{kreisel11implementierung,
    author = {Christian Kreisel},
    title = {Implementierung eines Algorithmus zur Analyse von Tandem-Massenspektren},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2011},
    type = {bachelor thesis},
    file = {:2011/Kreisel_ImplementierungAlgorithmusPolymere_BachelorThesis_2011.pdf:PDF},
    owner = {kerstin},
    timestamp = {2011.11.07},
    }
  • [DOI] Hufsky, F., Kuchenbecker, L., Jahn, K., Stoye, J. & Böcker, S. (2011) Swiftly computing center strings. IN Bmc bioinf, 12.106.
    [Bibtex]
    @Article{hufsky11swiftly,
    author = {Franziska Hufsky and L\'eon Kuchenbecker and Katharina Jahn and Jens Stoye and Sebastian B\"ocker},
    title = {Swiftly computing center strings},
    journal = {BMC Bioinf},
    year = {2011},
    volume = {12},
    pages = {106},
    abstract = {Background. The center string (or closest string) problem is a classic computer science problem with important applications in computational biology. Given k input strings and a distance threshold d, we search for a string within Hamming distance at most d to each input string. This problem is NP complete. Results. In this paper, we focus on exact methods for the problem that are also swift in application. We first introduce data reduction techniques that allow us to infer that certain instances have no solution, or that a center string must satisfy certain conditions. We describe how to use this information to speed up two previously published search tree algorithms. Then, we describe a novel iterative search strategy that is efficient in practice, where some of our reduction techniques can also be applied. Finally, we present results of an evaluation study for two different data sets from a biological application. Conclusions. We find that the running time for computing the optimal center string is dominated by the subroutine calls for d=dopt-1 and d=dopt. Our data reduction is very effective for both, either rejecting unsolvable instances or solving trivial positions. We find that this speeds up computations considerably.},
    data = {center_strings},
    doi = {10.1186/1471-2105-12-106},
    keywords = {jena; 3AGC; center string, closest string, gene clusters},
    owner = {Sebastian},
    pmid = {21504573},
    timestamp = {2011.04.03},
    url = {http://www.biomedcentral.com/1471-2105/12/106},
    }
  • [DOI] Grützmann, K., Böcker, S. & Schuster, S. (2011) Combinatorics of aliphatic amino acids. IN Naturwissenschaften, 98.79–86.
    [Bibtex]
    @Article{gruetzmann11combinatorics,
    author = {Konrad Gr\"utzmann and Sebastian B\"ocker and Stefan Schuster},
    title = {Combinatorics of aliphatic amino acids},
    journal = {Naturwissenschaften},
    year = {2011},
    volume = {98},
    number = {1},
    pages = {79--86},
    abstract = {This study combines biology and mathematics, showing that a relatively simple question from molecular biology can lead to complicated mathematics. The question is how to calculate the number of theoretically possible aliphatic amino acids as a function of the number of carbon atoms in the side chain. The presented calculation is based on earlier results from theoretical chemistry concerning alkyl compounds. Mathematical properties of this number series are highlighted. We discuss which of the theoretically possible structures really occur in living organisms, such as leucine and isoleucine with a chain length of four. This is done both for a strict definition of aliphatic amino acids only involving carbon and hydrogen atoms in their side chain and for a less strict definition allowing sulphur, nitrogen and oxygen atoms. While the main focus is on proteinogenic amino acids, we also give several examples of non-proteinogenic aliphatic amino acids, playing a role, for instance, in signalling. The results are in agreement with a general phenomenon found in biology: Usually, only a small number of molecules are chosen as building blocks to assemble an inconceivable number of different macromolecules as proteins. Thus, natural biological complexity arises from the multifarious combination of building blocks.},
    doi = {10.1007/s00114-010-0743-2},
    keywords = {jena; counting trees},
    owner = {Sebastian},
    pmid = {21120449},
    timestamp = {2010.12.08},
    }
  • Engler, M. (2011) Analysing copolymer synthesis by fitting a stochastic model to mass spectrometry data. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{engler11analysing,
    author = {Martin Engler},
    title = {Analysing copolymer synthesis by fitting a stochastic model to mass spectrometry data},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2011},
    type = {Diplomarbeit},
    keywords = {jena; polymer ms},
    owner = {martin},
    timestamp = {2011.11.07},
    }
  • Elshamy, S. (2011) A puzzle tool to assemble molecular structures from their fragments. bachelor thesis Master Thesis. .
    [Bibtex]
    @MastersThesis{elshamy11puzzle,
    author = {Samy Elshamy},
    title = {A puzzle tool to assemble molecular structures from their fragments},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2011},
    type = {bachelor thesis},
    file = {:2011/Elshamy_PuzzleToolAssemble_BachelorThesis_2011.pdf:PDF},
    owner = {rasche},
    timestamp = {2011.11.07},
    }
  • Chimani, M., Woste, M. & Böcker, S. (2011) A closer look at the closest string and closest substring problem Siam, 13–24.
    [Bibtex]
    @InProceedings{chimani11closer,
    author = {Markus Chimani and Matthias Woste and Sebastian B\"ocker},
    title = {A Closer Look at the Closest String and Closest Substring Problem},
    booktitle = {Proc. of Algorithm Engineering \& Experiments (ALENEX 2011)},
    year = {2011},
    pages = {13--24},
    publisher = {SIAM},
    abstract = {Let $S$ be a set of $k$ strings over an alphabet $\Sigma$; each string has a length between $\ell$ and $n$. The \emph{Closest Substring Problem (CSSP)} is to find a minimal integer $d$ (and a corresponding string $t$ of length $\ell$) such that each string $s\in S$ has a substring of length $\ell$ with Hamming distance at most $d$ to $t$. We say $t$ is the \emph{closest substring} to $S$. For $\ell = n$, this problem is known as the \emph{Closest String Problem (CSP)}. Particularly in computational biology, the CSP and CSSP have found numerous practical applications such as identifying regulatory motifs and approximate gene clusters, and in degenerate primer design. We study ILP formulations for both problems. Our experiments show that a position-based formulation for the CSP performs very well on real-world instances emerging from biology. Even on randomly generated instances that are hard to solve to optimality, solving the root relaxation leads to solutions very close to the optimum. For the CSSP we give a new formulation that is polytope-wise stronger than a straightforward extension of the CSP formulation. Furthermore we propose a strengthening constraint class that speeds up the running time.},
    keywords = {jena; FPT; center string; closest string; ILP},
    owner = {Sebastian},
    timestamp = {2010.11.11},
    url = {http://www.siam.org/proceedings/alenex/2011/alx11_02_chimanim.pdf},
    }
  • Böcker, S. (2011) Towards a data reduction for the minimum-flip supertree problem .
    [Bibtex]
    @TechReport{boecker11towards,
    author = {Sebastian B\"ocker},
    title = {Towards a data reduction for the Minimum-Flip Supertree problem},
    year = {2011},
    note = {arXiv:1104.4471v1},
    abstract = {In computational phylogenetics, the problem of constructing a supertree of a given set of rooted input trees can be formalized in different ways, to cope with contradictory information in the input. We consider the Minimum Flip Supertree problem, where the input trees are transformed into a 0/1/?-matrix, such that each row represents a taxon, and each column represents an inner node of one of the input trees. Our goal is to find a perfect phylogeny for the input matrix requiring a minimum number of 0/1-flips, that is, corrections of 0/1-entries in the matrix. The problem is known to be NP-complete. Here, we present a parameterized data reduction with polynomial running time. The data reduction guarantees that the reduced instance has a solution if and only if the original instance has a solution. We then make our data reduction parameter-independent by using upper bounds. This allows us to preprocess an instance, and to solve the reduced instance with an arbitrary method. Different from an existing data reduction for the consensus tree problem, our reduction allows us to draw conclusions about certain entries in the matrix. We have implemented and evaluated our data reduction. Unfortunately, we find that the Minimum Flip Supertree problem is also hard in practice: The amount of information that can be derived during data reduction diminishes as instances get more "complicated", and running times for "complicated" instances quickly become prohibitive. Still, our method offers another route of attack for this relevant phylogenetic problem.},
    keywords = {jena; FlipCut; flip supertrees; supertrees},
    owner = {Sebastian},
    timestamp = {2010.08.15},
    url = {http://arxiv.org/abs/1104.4471},
    }
  • Bungart, M. (2011) Strukturanalyse von glykanen auf grundlage von tandem-massenspektrometriedaten im sirius\textsuperscript2-framework. bachelor thesis Master Thesis. .
    [Bibtex]
    @MastersThesis{bungart11strukturanalyse,
    author = {Marco Bungart},
    title = {Strukturanalyse von Glykanen auf Grundlage von Tandem-Massenspektrometriedaten im Sirius\textsuperscript{2}-Framework},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2011},
    type = {bachelor thesis},
    owner = {rasche},
    timestamp = {2011.11.07},
    }
  • [DOI] Brinkmeyer, M., Griebel, T. & Böcker, S. (2011) Polynomial supertree methods revisited. IN Adv bioinformatics, 2011.21 pages.
    [Bibtex]
    @Article{brinkmeyer11polynomial,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {Polynomial Supertree Methods Revisited},
    journal = {Adv Bioinformatics},
    year = {2011},
    volume = {2011},
    number = {Article ID 524182},
    pages = {21 pages},
    abstract = {Supertree methods allow to reconstruct large phylogenetic trees by combining smaller trees with overlapping leaf sets, into one, more comprehensive supertree. The most commonly used supertree method, matrix representation with parsimony (MRP), produces accurate supertrees but is rather slow due to the underlying hard optimization problem. In this paper, we present an extensive simulation study comparing the performance of MRP and the polynomial supertree methods MinCut Supertree, Modified MinCut Supertree, Build-with-distances, PhySIC, PhySIC IST and super distance matrix. We consider both quality and resolution of the reconstructed supertrees. Our findings illustrate the trade-off between accuracy and running time in supertree construction, as well as the pros and cons of voting- and veto-based supertree approaches. Based on our results, we make some general suggestions for supertree methods yet to come.},
    doi = {10.1155/2011/524182},
    journalref = {brinkmeyer13polynomial},
    keywords = {jena; supertrees; simulation study},
    owner = {Sebastian},
    pmid = {22229028},
    timestamp = {2011.10.04},
    url = {http://www.hindawi.com/journals/abi/2011/524182/},
    }
  • [DOI] Brinkmeyer, M., Griebel, T. & Böcker, S. (2011) FlipCut supertrees: towards matrix representation accuracy in polynomial time Springer, berlin, 37–48.
    [Bibtex]
    @InProceedings{brinkmeyer11flipcut,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {{FlipCut} Supertrees: Towards Matrix Representation Accuracy in Polynomial Time},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2011)},
    year = {2011},
    volume = {6842},
    series = lncs,
    pages = {37--48},
    publisher = Springer,
    abstract = {In computational phylogenetics, supertree methods provide a way to reconstruct larger clades of the Tree of Life. The problem of constructing a supertree of a given set of rooted input trees can be formalized in different ways, to cope with contradictory information in the input. In particular, there exist methods based on encoding the input trees in a matrix, and methods based on finding minimum cuts in some graph representing the incompatibility in the input. Evaluations have shown that the former matrix representation methods will compute supertrees of better quality but, unfortunately, the underlying problems are computationally hard, which can heavily constrain the size of reconstructed trees. In contrast, graph-based methods compute a supertree in polynomial time, but these supertrees are inferior in quality. In this paper, we present a novel approach for the computation of supertrees, called FlipCut supertree. Our method combines the computation of minimum cuts from graph-based methods with a matrix representation method, namely Minimum Flip Supertrees. Here, the input trees are encoded in a 0/1/?-matrix, and we present a heuristic to search for a minimum set of 0/1-flips such that the resulting matrix admits a directed perfect phylogeny. We then extend our approach, by using edge weights to weight the columns of the 0/1/?-matrix. In our evaluation, we show that our method is extremely swift in practice, and orders of magnitude faster than the runner up. Concerning supertree accuracy, our method outperforms most polynomial time supertree methods, and is sometimes on par with matrix representation with parsimony.},
    doi = {10.1007/978-3-642-22685-4_4},
    file = {:2011/BrinkmeyerEtAl_FlipCutSupertreesTowards_COCOON_2011_preprint.pdf:PDF},
    journalref = {brinkmeyer13flipcut},
    keywords = {jena; linkpdf; FlipCut; supertrees; supertree methods},
    opteditor = {Fu, Bin and Du, Ding-Zhu},
    owner = {Sebastian},
    timestamp = {2010.11.10},
    }
  • [DOI] Baumgaertel, A., Scheubert, K., Pietsch, B., Kempe, K., Crecelius, A. C., Böcker, S. & Schubert, U. S. (2011) Analysis of different synthetic homopolymers by the use of a new calculation software for tandem mass spectra. IN Rapid commun mass spectrom, 25.1765–1778.
    [Bibtex]
    @Article{baumgaertel11analysis,
    author = {Anja Baumgaertel and Kerstin Scheubert and Bernhard Pietsch and Kristian Kempe and Anna C. Crecelius and Sebastian B\"ocker and Ulrich S. Schubert},
    title = {Analysis of different synthetic homopolymers by the use of a new calculation software for tandem mass spectra},
    journal = {Rapid Commun Mass Spectrom},
    year = {2011},
    volume = {25},
    number = {12},
    pages = {1765--1778},
    abstract = {The manual interpretation of tandem mass spectra of synthetic polymers is very time-consuming. Therefore, a new software tool was developed to accelerate the interpretation of spectra obtained without requiring any further knowledge about the polymer class or the fragmentation behavior under high-energy collision-induced dissociation (CID) conditions. The software only requires an alphabetical list of elements and a peak list of the measured substance as an xml file for the evaluation of the chosen mass spectrum. Tandem mass spectra of different homopolymers, like poly(2-oxazoline)s, poly(ethylene glycol) and poly(styrene), were interpreted by the new software tool. This contribution describes a fast and automated software tool for the rapid analysis of homopolymers.},
    doi = {10.1002/rcm.5019},
    keywords = {jena; polymer MS; schubert},
    owner = {kerstin},
    pmid = {21598337},
    timestamp = {2011.05.25},
    }
  • Böcker, S. & Nicolas, F. (2011) Various complexity results for computational mass spectrometry problems .
    [Bibtex]
    @TechReport{boecker11various,
    author = {Sebastian B\"ocker and Fran\c{c}ois Nicolas},
    title = {Various complexity results for computational mass spectrometry problems},
    year = {2011},
    note = {arXiv:1109.3367v4},
    abstract = {Define Minimum Soapy Union (MinSU) as the following optimization problem: given a $k$-tuple $(X_1, X_2, \dotsc, X_k)$ of finite integer sets, find a $k$-tuple $(t_1, t_2, \dotsc, t_k)$ of integers that minimizes the cardinality of ${(X_1 + t_1)} \cup {(X_2 + t_2)} \cup \dotsb \cup {(X_n + t_k)}$. We show that MinSU is NP-complete, APX-hard, and polynomial for fixed $k$. MinSU appears naturally in the context of protein shotgun sequencing: Here, the protein is cleaved into short and overlapping peptides, which are then analyzed by tandem mass spectrometry. To improve the quality of such spectra, one then asks for the mass of the unknown prefix (the shift) of the spectrum, such that the resulting shifted spectra show a maximum agreement. For real-world data the problem is even more complicated than our definition of MinSU; but our intractability results clearly indicate that it is unlikely to find a polynomial time algorithm for shotgun protein sequencing.},
    keywords = {jena; PABI; MS; complexity; intractability; protein sequencing; protein shotgun sequencing},
    owner = {Sebastian},
    timestamp = {2011.11.24},
    url = {http://arxiv.org/abs/1109.3367},
    }
  • [DOI] Böcker, S., Kehr, B. & Rasche, F. (2011) Determination of glycan structure from tandem mass spectra. IN Ieee/acm trans comput biology bioinform, 8.976–986.
    [Bibtex]
    @Article{boecker11determination,
    author = {Sebastian B\"ocker and Birte Kehr and Florian Rasche},
    title = {Determination of glycan structure from tandem mass spectra},
    journal = TCBB,
    year = {2011},
    volume = {8},
    number = {4},
    pages = {976--986},
    abstract = {Glycans are molecules made from simple sugars that form complex tree structures. Glycans constitute one of the most important protein modifications, and identification of glycans remains a pressing problem in biology. Unfortunately, the structure of glycans is hard to predict from the genome sequence of an organism. In this paper, we consider the problem of deriving the topology of a glycan solely from tandem mass spectrometry data. We study how to generate glycan tree candidates that suffciently match the sample mass spectrum, avoiding the combinatorial explosion of glycan structures. Unfortunately, the resulting problem is known to be computationally hard. We present an effcient exact algorithm for this problem based on fixed-parameter algorithmics, that can process a spectrum in a matter of seconds. We also report some preliminary results of our method on experimental data, combining it with a preliminary candidate evaluation scheme. We show that our approach is fast in applications, and that we can reach very good de novo identification results. Finally, we show how to count the number of glycan topologies for a fixed size or a fixed mass. We generalize this result to count the number of (labeled) trees with bounded out-degree, improving on results obtained using Polya's enumeration theorem.},
    doi = {10.1109/TCBB.2010.129},
    file = {:2011/BoeckerEtAl_DeterminationGlycanStructure_TCBB_2011.pdf:PDF},
    keywords = {fpt; jena; linkpdf; glycan ms; ms; PABI},
    owner = {Sebastian},
    pmid = {21173459},
    publisher = Springer,
    series = lncs,
    timestamp = {2009.04.06},
    }
  • [DOI] Böcker, S. & Damaschke, P. (2011) Even faster parameterized cluster deletion and cluster editing. IN Inform process lett, 111.717–721.
    [Bibtex]
    @Article{boecker11even,
    author = {Sebastian B\"ocker and Peter Damaschke},
    title = {Even faster parameterized cluster deletion and cluster editing},
    journal = IPL,
    year = {2011},
    volume = {111},
    number = {14},
    pages = {717--721},
    issn = {0020-0190},
    abstract = {Cluster Deletion and Cluster Editing ask to transform a graph by at most $k$ edge deletions or edge edits, respectively, into a cluster graph, i.e., disjoint union of cliques. Equivalently, a cluster graph has no conflict triples, i.e., two incident edges without a transitive edge. We solve the two problems in time $O^*(1.415^k)$ and $O^*(1.76^k)$, respectively. These results round off our earlier work by considerably improved time bounds. For Cluster Deletion we use a technique that cuts away small connected components that do no longer contribute to the exponential part of the time complexity. As this idea is simple and versatile, it may lead to improvements for several other parameterized graph problems. The improvement for Cluster Editing is achieved by using the full power of an earlier structure theorem for graphs where no edge is in three conflict triples.},
    doi = {10.1016/j.ipl.2011.05.003},
    file = {:2011/BoeckerDamaschke_EvenFasterParameterized_IPL_2011_preprint.pdf:PDF},
    keywords = {jena; linkpdf; fpt; PABI; cluster editing},
    owner = {Sebastian},
    timestamp = {2011.05.25},
    url = {http://www.sciencedirect.com/science/article/pii/S0020019011001232},
    }
  • [DOI] Böcker, S., Bui, Q. B. A. & Truss, A. (2011) Computing bond orders in molecule graphs. IN Theor comput sci, 412.1184–1195.
    [Bibtex]
    @Article{boecker11computing,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Anke Truss},
    title = {Computing Bond Orders in Molecule Graphs},
    journal = TCS,
    year = {2011},
    volume = {412},
    number = {12--14},
    pages = {1184--1195},
    abstract = {In this paper, we deal with restoring missing information in molecule databases: Many data formats only store the atoms configuration but omit bond multiplicities. As this information is essential for various applications in chemistry, we consider the problem of recovering bond type information using a scoring function for the possible valences of each atom---the Bond Order Assignment problem. We show that Bond Order Assignment is NP-hard, and its maximization version is MAX SNP-hard. We then give two exact fixed-parameter algorithms for the problem, where bond orders are computed via dynamic programming on a tree decomposition of the molecule graph. We evaluate our algorithm on a set of real molecule graphs and find that it works fast in practice.},
    doi = {10.1016/j.tcs.2010.12.063},
    file = {BoeckerEtAl_ComputingBondOrders_TheorComputSci_2011_preprint.pdf:2011/BoeckerEtAl_ComputingBondOrders_TheorComputSci_2011_preprint.pdf:PDF},
    keywords = {jena; linkpdf; PABI; FPT; bond orders; bond order assignment problem},
    owner = {baoanh},
    timestamp = {2010.08.29},
    }
  • Böcker, S., Bui, B., Nicolas, F. & Truss, A. (2011) Intractability of the minimum flip supertree problem and its variants .
    [Bibtex]
    @TechReport{boecker11intractability,
    author = {Sebastian B\"ocker and Bao Bui and Fran\c{c}ois Nicolas and Anke Truss},
    title = {Intractability of the Minimum Flip Supertree problem and its variants},
    institution = {arXiv preprint arXiv:1112.4536},
    year = {2011},
    note = {arXiv:1112.4536v1},
    abstract = {Computing supertrees is a central problem in phylogenetics. The supertree method that is by far the most widely used today was introduced in 1992 and is called Matrix Representation with Parsimony analysis (MRP). Matrix Representation using Flipping (MRF), which was introduced in 2002, is an interesting variant of MRP: MRF is arguably more relevant that MRP and various efficient implementations of MRF have been presented. From a theoretical point of view, implementing MRF or MRP is solving NP-hard optimization problems. The aim of this paper is to study the approximability and the fixed-parameter tractability of the optimization problem corresponding to MRF, namely Minimum-Flip Supertree. We prove strongly negative results.},
    keywords = {jena; PABI; flip supertrees; supertrees; complexity; intractability},
    owner = {Sebastian},
    timestamp = {2011.11.24},
    url = {http://arxiv.org/abs/1112.4536},
    }
  • [DOI] Böcker, S., Briesemeister, S. & Klau, G. W. (2011) Exact algorithms for cluster editing: evaluation and experiments. IN Algorithmica, 60.316–334.
    [Bibtex]
    @Article{boecker11exact,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Gunnar W. Klau},
    title = {Exact Algorithms for Cluster Editing: Evaluation and Experiments},
    journal = {Algorithmica},
    year = {2011},
    volume = {60},
    number = {2},
    pages = {316--334},
    abstract = {The Cluster Editing problem is defined as follows: Given an undirected, loopless graph, we want to find a set of edge modifications (insertions and deletions) of minimum cardinality, such that the modified graph consists of disjoint cliques. We present empirical results for this problem using exact methods from fixed-parameter algorithmics and linear programming. We investigate parameter-independent data reduction methods and find that effective preprocessing is possible if the number of edge modifications k is smaller than some multiple of $\lvert V\rvert$ , where V is the vertex set of the input graph. In particular, combining parameter-dependent data reduction with lower and upper bounds we can effectively reduce graphs satisfying $k\leq25\lvert V\rvert$ . In addition to the fastest known fixed-parameter branching strategy for the problem, we investigate an integer linear program (ILP) formulation of the problem using a cutting plane approach. Our results indicate that both approaches are capable of solving large graphs with 1000 vertices and several thousand edge modifications. For the first time, complex and very large graphs such as biological instances allow for an exact solution, using a combination of the above techniques.},
    data = {cluster_editing_data},
    doi = {10.1007/s00453-009-9339-7},
    file = {BoeckerEtAl_ExactAlgorithmsClusterEditing_Algorithmica_2011_preprint.pdf:2011/BoeckerEtAl_ExactAlgorithmsClusterEditing_Algorithmica_2011_preprint.pdf:PDF},
    keywords = {jena; linkpdf; FPT; PABI},
    owner = {Sebastian},
    timestamp = {2009.03.08},
    }
  • [DOI] Wittkop, T., Rahmann, S., Böcker, S. & Baumbach, J. (2011) Extension and robustness of transitivity clustering for protein-protein interaction network analysis. IN Internet math, 7.255–273.
    [Bibtex]
    @Article{wittkop11extension,
    author = {Tobias Wittkop and Sven Rahmann and Sebastian B\"ocker and Jan Baumbach},
    title = {Extension and robustness of Transitivity Clustering for protein-protein interaction network analysis},
    journal = {Internet Math},
    year = {2011},
    volume = {7},
    number = {4},
    pages = {255--273},
    abstract = {Partitioning biological data objects into groups, such that the objects within the groups share common traits, is a long-standing challenge in computational biology. Recently, we developed and established Transitivity Clustering, a partitioning approach based on Weighted Transitive Graph Projection that utilizes a single similarity threshold as density parameter. In previous publications, we concentrated on the graphical user interface and on concrete biomedical application protocols. Here, we contribute the following theoretical considerations: (1) We provide proofs that the average similarity between objects from the same cluster is above the user-given threshold and the average similarity between objects from different clusters is below the threshold. (2) We extend Transitivity Clustering to an overlappingclustering tool by integrating two new approaches. (3) We demonstrate the power of Transitivity Clustering for protein complex detection. We evaluate our approaches against others by utilizing gold standard data that was previously used by Broh\'ee et al. for reviewing existing bioinformatics clustering tools. The extended version of Transitivity Clustering is available online at http://transclust.cebitec.uni-bielefeld.de.},
    doi = {10.1080/15427951.2011.604559},
    keywords = {jena; cluster editing},
    optmonth = may,
    owner = {Sebastian},
    timestamp = {2010.11.16},
    url = {http://transclust.cebitec.uni-bielefeld.de},
    }

2010

  • [DOI] Brinkmeyer, M., Griebel, T. & Böcker, S. (2010) Polynomial supertree methods revisited Springer, berlin, 183–194.
    [Bibtex]
    @InProceedings{brinkmeyer10polynomial,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {Polynomial Supertree Methods Revisited},
    booktitle = {Proc. of Pattern Recognition in Bioinformatics (PRIB 2010)},
    year = {2010},
    volume = {6282},
    series = lncs,
    pages = {183--194},
    publisher = Springer,
    abstract = {Supertree methods allow to reconstruct large phylogenetic trees by combining smaller trees with overlapping leaf sets, into one, more comprehensive supertree. The most commonly used supertree method, matrix representation with parsimony (MRP), produces accurate supertrees but is rather slow due to the underlying hard optimization problem. In this paper, we present an extensive simulation study comparing the performance of MRP and the polynomial supertree methods MinCut Supertree, Modified MinCut Supertree, Build-with-distances, PhySIC, and PhySIC IST. We consider both quality and resolution of the reconstructed supertrees. Our findings illustrate the trade-off between accuracy and running time in supertree construction, as well as the pros and cons of voting- and veto-based supertree approaches.},
    doi = {10.1007/978-3-642-16001-1_16},
    file = {:2010/BrinkmeyerEtAl_PolynomialSupertreeMethodsRevisited_PRIB_2010.pdf:PDF},
    journalref = {brinkmeyer11polynomial},
    keywords = {jena; linkpdf; phylogenetics, supertrees},
    owner = {Sebastian},
    timestamp = {2010.06.27},
    }
  • Bui, Q. B. A. (2010) Fixed-parameter algorithms for some combinatorial problems in bioinformatics. PhD Thesis. Jena; Germany, .
    [Bibtex]
    @PhdThesis{bui10fixed-parameter,
    author = {Quang Bao Anh Bui},
    title = {Fixed-parameter algorithms for some combinatorial problems in bioinformatics},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2010},
    address = {Jena; Germany},
    file = {:2011/Bui_FixedParameterAlgorithmsCombinatorialProblems_phdthesis_2011.pdf:PDF},
    keywords = {jena; linkpdf; fpt},
    owner = {anke},
    timestamp = {2011.03.21},
    url = {http://www.db-thueringen.de/servlets/DocumentServlet?id=18295},
    }
  • [DOI] Chimani, M., Rahmann, S. & Böcker, S. (2010) Exact ILP solutions for phylogenetic minimum flip problems Acm press, new york, 147–153.
    [Bibtex]
    @InProceedings{chimani10exact,
    author = {Markus Chimani and Sven Rahmann and Sebastian B\"ocker},
    title = {Exact {ILP} Solutions for Phylogenetic Minimum Flip Problems},
    booktitle = {Proc. of ACM Conf. on Bioinformatics and Computational Biology (ACM-BCB 2010)},
    year = {2010},
    pages = {147--153},
    publisher = {ACM press, New York},
    abstract = {In computational phylogenetics, the problem of constructing a consensus tree or supertree of a given set of rooted input trees can be formalized in different ways. We consider the Minimum Flip Consensus Tree and Minimum Flip Supertree problem, where input trees are transformed into a 0/1/?-matrix, such that each row represents a taxon, and each column represents a subtree membership. For the consensus tree problem, all input trees contain the same set of taxa, and no ?-entries occur. For the supertree problem, the input trees may contain different subsets of the taxa, and unrepresented taxa are coded with ?-entries. In both cases, the goal is to find a perfect phylogeny for the input matrix requiring a minimum number of 0/1-flips, i.e., matrix entry corrections. Both optimization problems are NP-hard. We present the first efficient Integer Linear Programming (ILP) formulations for both problems, using three distinct characterizations of a perfect phylogeny. Although these three formulations seem to differ considerably at first glance, we show that they are in fact polytope-wise equivalent. Introducing a novel column generation scheme, it turns out that the simplest, purely combinatorial formulation is the most efficient one in practice. Using our framework, it is possible to find exact solutions for instances with ~100 taxa.},
    doi = {10.1145/1854776.1854800},
    file = {:2010/ChimaniEtAl_ExactILPSolutionsMinFlip_ACMBCB_2010.pdf:PDF},
    keywords = {jena; linkpdf; FlipCut; exact methods, ilp, flip supertrees},
    owner = {Sebastian},
    timestamp = {2010.02.21},
    }
  • [DOI] Dom, M., Guo, J., Hüffner, F., Niedermeier, R. & Truss, A. (2010) Fixed-parameter tractability results for feedback set problems in tournaments. IN J discrete algorithms, 8.76–86.
    [Bibtex]
    @Article{dom10fixed-parameter,
    author = {Michael Dom and Jiong Guo and Falk H\"uffner and Rolf Niedermeier and Anke Truss},
    journal = {J Discrete Algorithms},
    title = {Fixed-parameter tractability results for feedback set problems in tournaments},
    year = {2010},
    number = {1},
    pages = {76--86},
    volume = {8},
    abstract = {Complementing recent progress on classical complexity and polynomial-time approximability of feedback set problems in (bipartite) tournaments, we extend and improve fixed-parameter tractability results for these problems. We show that Feedback Vertex Set in tournaments (FVST) is amenable to the novel iterative compression technique, and we provide a depth-bounded search tree for Feedback Arc Set in bipartite tournaments based on a new forbidden subgraph characterization. Moreover, we apply the iterative compression technique to d-Hitting Set, which generalizes Feedback Vertex Set in tournaments, and obtain improved upper bounds for the time needed to solve 4-Hitting Set and 5-Hitting Set. Using our parameterized algorithm for Feedback Vertex Set in tournaments, we also give an exact (not parameterized) algorithm for it running in O(1.709^n) time, where n is the number of input graph vertices, answering a question of Woeginger [G.J. Woeginger, Open problems around exact algorithms, Discrete Appl. Math. 156 (3) (2008) 397?405].},
    doi = {10.1016/j.jda.2009.08.001},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {anke},
    timestamp = {2009.09.21},
    }
  • [DOI] Govind, G., Mittapalli, O., Griebel, T., Allmann, S., Böcker, S. & Baldwin, I. T. (2010) Unbiased transcriptional comparisons of generalist and specialist herbivores feeding on progressively defenseless \emphNicotiana attenuata plants. IN Plos one, 5.e8735.
    [Bibtex]
    @Article{govind10unbiased,
    author = {Govind, Geetha and Mittapalli, Omprakash and Griebel, Thasso and Allmann, Silke and B\"ocker, Sebastian and Baldwin, Ian Thomas},
    title = {Unbiased Transcriptional Comparisons of Generalist and Specialist Herbivores Feeding on Progressively Defenseless \emph{{Nicotiana} attenuata} Plants},
    journal = {PLoS One},
    year = {2010},
    volume = {5},
    number = {1},
    pages = {e8735},
    abstract = {Background Herbivore feeding elicits dramatic increases in defenses, most of which require jasmonate (JA) signaling, and against which specialist herbivores are thought to be better adapted than generalist herbivores. Unbiased transcriptional analyses of how neonate larvae cope with these induced plant defenses are lacking. Methodology/Principal Findings We created cDNA microarrays for Manduca sexta and Heliothis virescens separately, by spotting normalized midgut-specific cDNA libraries created from larvae that fed for 24 hours on MeJA-elicited wild-type (WT) Nicotiana attenuata plants. These microarrays were hybridized with labeled probes from neonates that fed for 24 hours on WT and isogenic plants progressively silenced in JA-mediated defenses (N: nicotine; N/PI: N and trypsin protease inhibitors; JA: all JA-mediated defenses). H. virescens neonates regulated 16 times more genes than did M. sexta neonates when they fed on plants silenced in JA-mediated defenses, and for both species, the greater the number of defenses silenced in the host plant (JA > N/PI > N), the greater were the number of transcripts regulated in the larvae. M. sexta larvae tended to down-regulate while H. virescens larvae up- and down-regulated transcripts from the same functional categories of genes. M. sexta larvae regulated transcripts in a diet-specific manner, while H. virescens larvae regulated a similar suite of transcripts across all diet types. Conclusions/Significance The observations are consistent with the expectation that specialists are better adapted than generalist herbivores to the defense responses elicited in their host plants by their feeding. While M. sexta larvae appear to be better adapted to N. attenuata's defenses, some of the elicited responses remain effective defenses against both herbivore species. The regulated genes provide novel insights into larval adaptations to N. attenuata's induced defenses, and represent potential targets for plant-mediated RNAi to falsify hypotheses about the process of adaptation.},
    doi = {10.1371/journal.pone.0008735},
    file = {:2010/GodvindEtAl_UnbiasedTranscriptionalComparisons_PLoSone_2010.pdf:PDF},
    keywords = {jena; microarray, cDNA, EST},
    pmid = {20090945},
    publisher = {Public Library of Science},
    }
  • [DOI] Guillemot, S. & Mnich, M. (2010) Kernel and fast algorithm for Dense Triplet Inconsistency Springer, berlin, 247–257.
    [Bibtex]
    @InProceedings{guillemot10kernel,
    author = {Sylvain Guillemot and Matthias Mnich},
    title = {Kernel and Fast Algorithm for {Dense Triplet Inconsistency}},
    booktitle = {Proc. of Theory and Applications of Models of Computation (TAMC 2010)},
    year = {2010},
    volume = {6108},
    series = lncs,
    pages = {247--257},
    publisher = Springer,
    abstract = {We study the parameterized complexity of inferring supertrees from sets of rooted triplets, an important problem in phylogenetics. For a set L of labels and a dense set $\mathcal R$ of triplets distinctly leaf-labeled by 3-subsets of L we seek a tree distinctly leaf-labeled by L and containing all but at most p triplets from $\mathcal R$ as homeomorphic subtree. Our results are the first polynomial kernel for this problem, with O(p 2) labels, and a subexponential fixed-parameter algorithm running in time $2^{O(p^{1/3} \log p)} + O(n^4)$.},
    doi = {10.1007/978-3-642-13562-0_23},
    file = {:2010/GuillemotMnich_KernelFastAlgorithm_TAMC_2010.pdf:PDF},
    keywords = {jena},
    owner = {Sebastian},
    timestamp = {2010.04.13},
    }
  • Guillemot, S., Paul, C. & Perez, A. (2010) On the (non-)existence of polynomial kernels for $P_l$-free edge modification problems .
    [Bibtex]
    @TechReport{guillemot10nonexistence,
    author = {Sylvain Guillemot and Christophe Paul and Anthony Perez},
    title = {On the (non-)existence of polynomial kernels for $P_l$-free edge modification problems},
    year = {2010},
    note = {arXiv:1007.4011},
    abstract = {Given a graph $G=(V,E)$ and an integer $k$, an edge modification problem for a graph property $\Pi$ consists in deciding whether there exists a set of edges $F$ of size at most $k$ such that the graph $H=(V,E\vartriangle F)$ satisfies the property $\Pi$. In the $\Pi$ edge-completion problem, the set $F$ of edges is constrained to be disjoint from $E$; in the $\Pi$ edge-deletion problem, $F$ is a subset of $E$; no constraint is imposed on $F$ in the $\Pi$ edge-edition problem.
    A number of optimization problems can be expressed in terms of graph modification problems which have been extensively studied in the context of parameterized complexity. When parameterized by the size $k$ of the edge set $F$, it has been proved that if $\Pi$ is an hereditary property characterized by a finite set of forbidden induced subgraphs, then the three $\Pi$ edge-modification problems are FPT. It was then natural to ask whether these problems also admit a polynomial size kernel.
    Using recent lower bound techniques, Kratsch and Wahlstr\"om answered this question negatively. However, the problem remains open on many natural graph classes characterized by forbidden induced subgraphs.
    Kratsch and Wahlstr\"om asked whether the result holds when the forbidden subgraphs are paths or cycles and pointed out that the problem is already open in the case of $P_4$-free graphs (i.e. cographs). This paper provides positive and negative results in that line of research. We prove that parameterized cograph edge modification problems have cubic vertex kernels whereas polynomial kernels are unlikely to exist for the $P_l$-free and $C_l$-free edge-deletion problems for large enough $l$.},
    file = {GuillemotEtAl_NonExistencePolynomialKernels_CoRR_2010.pdf:2010/GuillemotEtAl_NonExistencePolynomialKernels_CoRR_2010.pdf:PDF},
    keywords = {jena;},
    owner = {marcusL},
    timestamp = {Mon, 05 Dec 2011 18:04:25 +0100},
    url = {http://arxiv.org/abs/1007.4011},
    }
  • [DOI] Guillemot, S. & Sikora, F. (2010) Finding and counting vertex-colored subtrees Springer, berlin, 405–416.
    [Bibtex]
    @InProceedings{guillemot10finding,
    author = {Sylvain Guillemot and Florian Sikora},
    title = {Finding and counting vertex-colored subtrees},
    booktitle = {Proc. of Mathematical Foundations of Computer Science (MFCS 2010)},
    year = {2010},
    volume = {6281},
    series = lncs,
    pages = {405--416},
    publisher = Springer,
    abstract = {The problems studied in this article originate from the Graph Motif problem introduced by Lacroix et al. [17] in the context of biological networks. The problem is to decide if a vertex-colored graph has a connected subgraph whose colors equal a given multiset of colors M. Using an algebraic framework recently introduced by Koutis et al. [15,16], we obtain new FPT algorithms for Graph Motif and variants, with improved running times. We also obtain results on the counting versions of this problem, showing that the counting problem is FPT if M is a set, but becomes \#W[1]-hard if M is a multiset with two colors.},
    doi = {10.1007/978-3-642-15155-2_36},
    keywords = {fpt; jena; linkpdf; PABI},
    owner = {imran},
    timestamp = {2010.12.23},
    url = {http://arxiv.org/abs/1002.1880},
    }
  • Hufsky, F. (2010) Algorithm engineering for the center string problem. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{hufsky10algorithm,
    author = {Franziska Hufsky},
    title = {Algorithm Engineering for the Center String Problem},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2010},
    type = {Diplomarbeit},
    keywords = {jena; center string;},
    owner = {bertram},
    timestamp = {2014.08.08},
    }
  • [DOI] Hufsky, F., Kuchenbecker, L., Jahn, K., Stoye, J. & Böcker, S. (2010) Swiftly computing center strings Springer, berlin, 325–336.
    [Bibtex]
    @InProceedings{hufsky10swiftly,
    author = {Franziska Hufsky and L\'eon Kuchenbecker and Katharina Jahn and Jens Stoye and Sebastian B\"ocker},
    title = {Swiftly computing center strings},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2010)},
    year = {2010},
    volume = {6293},
    series = lncs,
    pages = {325--336},
    publisher = Springer,
    abstract = {The center string (or closest string) problem is a classical computer science problem with important applications in computational biology. Given $k$ input strings and a distance threshold $d$, we search for a string within Hamming distance $d$ to each input string. This problem is NP-complete. In this paper, we focus on exact methods for the problem that are also fast in application. First, we introduce data reduction techniques that allow us to infer that certain instances have no solution, or that a center string must satisfy certain conditions. Then, we describe a novel search tree strategy that is very efficient in practice. Finally, we present results of an evaluation study for instances from a biological application. We find that data reduction is mandatory for the notoriously difficult case $d=\dopt-1$.},
    data = {center_strings},
    doi = {10.1007/978-3-642-15294-8_27},
    file = {:2010/HufskyEtAl_SwiftlyComputingCenterStrings_WABI_2010.pdf:PDF},
    journalref = {hufsky11swiftly},
    keywords = {jena; 3AGC; center string, closest string, gene clusters},
    owner = {Sebastian},
    timestamp = {2010.05.29},
    }
  • [DOI] Neumann, S. & Böcker, S. (2010) Computational mass spectrometry for metabolomics – a review. IN Anal bioanal chem, 398.2779–2788.
    [Bibtex]
    @Article{neumann10computational,
    author = {Steffen Neumann and Sebastian B\"ocker},
    title = {Computational Mass Spectrometry for Metabolomics -- A Review},
    journal = {Anal Bioanal Chem},
    year = {2010},
    volume = {398},
    number = {7},
    pages = {2779--2788},
    abstract = {The identification of compounds from mass spectrometry (MS) data is still seen as a major bottleneck in the interpretation of MS data. This is particularly the case for the identification of small compounds such as metabolites, where until recently little progress has been made. Here we review the available approaches to annotation and identification of chemical compounds based on electrospray ionization (ESI-MS) data. The methods are not limited to metabolomics applications, but are applicable to any small compounds amenable to MS analysis. Starting with the definition of identification, we focus on the analysis of tandem mass and $MS^n$ spectra, which can provide a wealth of structural information. Searching in libraries of reference spectra provides the most reliable source of identification, especially if measured on comparable instruments. We review several choices for the distance functions. The identification without reference spectra is even more challenging, because it requires approaches to interpret tandem mass spectra with regard to the molecular structure. Both commercial and free tools are capable of mining general-purpose compound libraries, and identifying candidate compounds. The holy grail of computational mass spectrometry is the de novo deduction of structure hypotheses for compounds, where method development has only started thus far. In a case study, we apply several of the available methods to the three compounds, kaempferol, reserpine, and verapamil, and investigate whether this results in reliable identifications.},
    doi = {10.1007/s00216-010-4142-5},
    file = {NeumannBoecker_ComputationalMassSpectrometry_AnalBioanalChem_2010_reprint.pdf:2010/NeumannBoecker_ComputationalMassSpectrometry_AnalBioanalChem_2010_reprint.pdf:PDF},
    keywords = {jena; linkpdf; MS; Mass Spectrometry; metabolites; metabolite MS; isotope patterns; TrACReview},
    owner = {Sebastian},
    pmid = {20936272},
    timestamp = {2010.08.23},
    }
  • Vidoudez, C. (2010) Diatom metabolomics. PhD Thesis. .
    [Bibtex]
    @PhdThesis{vidoudez10diatom,
    author = {Charles Vidoudez},
    title = {Diatom metabolomics},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2010},
    owner = {fhufsky},
    timestamp = {2013.05.14},
    }
  • [DOI] Wittkop, T., Emig, D., Lange, S., Rahmann, S., Albrecht, M., Morris, J. H., Böcker, S., Stoye, J. & Baumbach, J. (2010) Partitioning biological data with transitivity clustering. IN Nat methods, 7.419–420.
    [Bibtex]
    @Article{wittkop10partitioning,
    author = {Tobias Wittkop and Dorothea Emig and Sita Lange and Sven Rahmann and Mario Albrecht and John H. Morris and Sebastian B\"ocker and Jens Stoye and Jan Baumbach},
    title = {Partitioning biological data with transitivity clustering},
    journal = {Nat Methods},
    year = {2010},
    volume = {7},
    number = {6},
    pages = {419--420},
    abstract = {Clustering is a common computational technique for data analysis in the life sciences. Essentially one defines clustering as a partitioning of arbitrary data objects into groups, such that the objects in each group, or cluster, have common traits, with respect to a similarity function. Ideally, objects from the same cluster are more similar to each other than to objects from different clusters. A density parameter controls the size and the number of resulting clusters. Here we present `transitivity clustering', an alternative way to partition biological data, with easy-to-use interfaces: (i) a web interface for a quick analysis of medium-sized datasets, (ii) a powerful stand-alone Java implementation for large-scale data clustering, and (iii) a collection of Cytoscape plug-ins that also provide methods to answer typical follow-up questions.},
    doi = {10.1038/nmeth0610-419},
    file = {:2010/WittkopEtAl_PartitioningBiologicalData_NatMethods_2010.pdf:PDF},
    keywords = {jena; linkpdf; cluster editing, clustering, fpt},
    owner = {Sebastian},
    pmid = {20508635},
    timestamp = {2010.04.27},
    }

2009

  • [DOI] Böcker, S., Briesemeister, S., Bui, Q. B. A. & Truss, A. (2009) Going weighted: parameterized algorithms for cluster editing. IN Theor comput sci, 410.5467–5480.
    [Bibtex]
    @Article{boecker09going,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Quang Bao Anh Bui and Anke Truss},
    title = {Going Weighted: Parameterized Algorithms for Cluster Editing},
    journal = TCS,
    year = {2009},
    volume = {410},
    number = {52},
    pages = {5467--5480},
    abstract = {The goal of the Cluster Editing problem is to make the fewest changes to the edge set of an input graph such that the resulting graph is a disjoint union of cliques. This problem is NP-complete but recently, several parameterized algorithms have been proposed. In this paper, we present a number of surprisingly simple search tree algorithms for Weighted Cluster Editing assuming that edge insertion and deletion costs are positive integers. We show that the smallest search tree has size O(1.82^k) for edit cost k, resulting in the currently fastest parameterized algorithm, both for this problem and its unweighted counterpart. We have implemented and compared our algorithms, and achieved promising results.},
    data = {cluster_editing_data},
    doi = {10.1016/j.tcs.2009.05.006},
    file = {:2009/BoeckerEtAl_GoingWeightedParameterized_TheorComputSci_2009.pdf:PDF},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {Sebastian},
    }
  • [DOI] Böcker, S., Briesemeister, S. & Klau, G. W. (2009) On optimal comparability editing with applications to molecular diagnostics. IN Bmc bioinf, 10.S61.
    [Bibtex]
    @Article{boecker09optimal,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Gunnar W. Klau},
    title = {On optimal comparability editing with applications to molecular diagnostics},
    journal = {BMC Bioinf},
    year = {2009},
    volume = {10},
    number = {Suppl 1},
    pages = {S61},
    note = {Proc. of \emph{Asia-Pacific Bioinformatics Conference} (APBC 2009)},
    abstract = {The COMPARABILITY EDITING problem appears in the context of hierarchical disease classification based on noisy data. We are given a directed graph G representing hierarchical relationships between patient subgroups. The task is to identify the minimum number of edge insertions or deletions to transform G into a transitive graph, that is, if edges (u, v) and (v, w) are present then edge (u, w) must be present, too. We present two new approaches for the problem based on fixed-parameter algorithmics and integer linear programming. In contrast to previously used heuristics, our approaches compute provably optimal solutions. Our computational results demonstrate that our exact algorithms are by far more efficient in practice than a previously used heuristic approach. In addition to the superior running time performance, our algorithms are capable of enumerating all optimal solutions, and naturally solve the weighted version of the problem.},
    doi = {10.1186/1471-2105-10-S1-S61},
    keywords = {jena; FPT; linkpdf; PABI; transitivity editing;},
    owner = {Sebastian},
    pmid = {19208165},
    timestamp = {2007.10.17},
    url = {http://www.biomedcentral.com/1471-2105/10/S1/S61},
    }
  • [DOI] Böcker, S., Bui, Q. B. A., Seeber, P. & Truss, A. (2009) Computing bond types in molecule graphs Springer, berlin, 297–306.
    [Bibtex]
    @InProceedings{boecker09computing,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Patrick Seeber and Anke Truss},
    title = {Computing Bond Types in Molecule Graphs},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2009)},
    year = {2009},
    volume = {5609},
    series = lncs,
    pages = {297--306},
    publisher = Springer,
    abstract = {In this paper we deal with restoring missing information in molecule databases: Some data formats only store the atoms' configuration but omit bond multiplicities. As this information is essential for various applications in chemistry, we consider the problem of recovering bond type information using a scoring function for the possible valences of each atom---the Bond Type Assignment problem. We prove the NP-hardness of Bond Type Assignment and give an exact fixed-parameter algorithm for the problem where bond types are computed via dynamic programming on a tree decomposition of the molecule graph. We evaluate our algorithm on a set of real molecule graphs and find that it works fast and accurately.},
    doi = {10.1007/978-3-642-02882-3_28},
    file = {BoeckerEtAl_ComputingBondTypesMoleculeGraphs_Cocoon_2009.pdf:2009/BoeckerEtAl_ComputingBondTypesMoleculeGraphs_Cocoon_2009.pdf:PDF},
    journalref = {boecker11computing},
    keywords = {jena; FPT; linkpdf; tree decomposition; PABI},
    owner = {baoanh},
    timestamp = {2009.04.06},
    }
  • [DOI] Böcker, S., Hüffner, F., Truss, A. & Wahlström, M. (2009) A faster fixed-parameter approach to drawing binary tanglegrams Springer, berlin, 38–49.
    [Bibtex]
    @InProceedings{boecker09faster,
    author = {Sebastian B\"ocker and Falk H\"uffner and Anke Truss and Magnus Wahlstr\"om},
    title = {A faster fixed-parameter approach to drawing binary tanglegrams},
    booktitle = {Proc. of International Workshop on Parameterized and Exact Computation (IWPEC 2009)},
    year = {2009},
    volume = {5917},
    series = lncs,
    pages = {38--49},
    publisher = Springer,
    abstract = {Given two binary phylogenetic trees covering the same n species, it is useful to compare them by drawing them with leaves arranged side-by-side. To facilitate comparison, we would like to arrange the trees to minimize the number of crossings k induced by connecting pairs of identical species. This is the NP-hard Tanglegram Layout problem. By providing a fast transformation to the Balanced Subgraph problem, we show that the problem admits an O(2^k n^4) algorithm, improving upon a previous fixed-parameter approach with running time O(c^k n^O(1)) where c \approx 1000. We enhance a Balanced Subgraph implementation based on data reduction and iterative compression with improvements tailored towards these instances, and run experiments with real-world data to show the practical applicability of this approach. All practically relevant (k <= 1000) Tanglegram Layout instances can be solved exactly within seconds. Additionally, we provide a kernel-like bound by showing how to reduce the Balanced Subgraph instances for Tanglegram Layout on complete binary trees to a size of O(k log k).},
    doi = {10.1007/978-3-642-11269-0_3},
    file = {:2009/BoeckerEtAl_FasterFixedParameterApproach_IWPEC_2009.pdf:PDF},
    keywords = {jena; linkpdf; fpt; PABI},
    owner = {anke},
    timestamp = {2009.07.08},
    }
  • [DOI] Böcker, S., Jahn, K., Mixtacki, J. & Stoye, J. (2009) Computation of median gene clusters. IN J comput biol, 16.1085–1099.
    [Bibtex]
    @Article{boecker09computation,
    author = {Sebastian B\"ocker and Katharina Jahn and Julia Mixtacki and Jens Stoye},
    title = {Computation of median gene clusters},
    journal = {J Comput Biol},
    year = {2009},
    volume = {16},
    number = {8},
    pages = {1085--1099},
    abstract = {Whole genome comparison based on gene order has become a popular approach in comparative genomics. An important task in this field is the detection of gene clusters, i.e., sets of genes that occur co-localized in several genomes. For most applications, it is preferable to extend this definition to allow for small deviations in the gene content of the cluster occurrences. However, relaxing the equality constraint increases the computational complexity of gene cluster detection drastically. Existing approaches deal with this problem by using simplifying constraints on the cluster definition and/or allowing only pairwise genome comparison. In this article, we introduce a cluster concept named median gene clusters that improves over existing models, present efficient algorithms for their computation and show experimental results on the detection of approximate gene clusters in multiple genomes.},
    comment = {JV von RECOMB 2008},
    doi = {10.1089/cmb.2009.0098},
    file = {:2009/BoeckerEtAl_ComputationMedianGeneClusters_JCB_2009.pdf:PDF},
    keywords = {jena; linkpdf; 3AGC; gene clusters, genome rearrangements},
    owner = {Sebastian},
    pmid = {19689215},
    timestamp = {2009.10.10},
    }
  • [DOI] Böcker, S., Kehr, B. & Rasche, F. (2009) Determination of glycan structure from tandem mass spectra Springer, berlin, 258–267.
    [Bibtex]
    @InProceedings{boecker09determination,
    author = {Sebastian B\"ocker and Birte Kehr and Florian Rasche},
    title = {Determination of glycan structure from tandem mass spectra},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2009)},
    year = {2009},
    volume = {5609},
    series = lncs,
    pages = {258--267},
    publisher = Springer,
    abstract = {Glycans are molecules made from simple sugars that form complex tree structures. Glycans constitute one of the most important protein modifications, and identification of glycans remains a pressing problem in biology. Unfortunately, the structure of glycans is hard to predict from the genome sequence of an organism. We consider the problem of deriving the topology of a glycan solely from tandem mass spectrometry data. We want to generate glycan tree candidates that sufficiently match the sample mass spectrum. Unfortunately, the resulting problem is known to be computationally hard. We present an efficient exact algorithm for this problem based on fixedparameter algorithmics, that can process a spectrum in a matter of seconds. We also report some preliminary results of our method on experimental data. We show that our approach is fast enough in applications, and that we can reach very good de novo identification results. Finally, we show how to compute the number of glycan topologies of a given size.},
    doi = {10.1007/978-3-642-02882-3_26},
    file = {BoeckerEtAl_DeterminationGlycanStructure_COCOON_2009.pdf:2009/BoeckerEtAl_DeterminationGlycanStructure_COCOON_2009.pdf:PDF},
    journalref = {boecker11determination},
    keywords = {fpt; jena; linkpdf; glycan ms; ms; PABI},
    owner = {Sebastian},
    timestamp = {2010.02.16},
    }
  • [DOI] Böcker, S., Letzel, M., relax Zs & Pervukhin, A. (2009) SIRIUS: decomposing isotope patterns for metabolite identification. IN Bioinformatics, 25.218–224.
    [Bibtex]
    @Article{boecker09sirius,
    author = {Sebastian B\"ocker and Matthias Letzel and {\relax Zs}uzsanna Lipt{\'a}k and Anton Pervukhin},
    title = {{SIRIUS}: Decomposing isotope patterns for metabolite identification},
    journal = {Bioinformatics},
    year = {2009},
    volume = {25},
    number = {2},
    pages = {218--224},
    abstract = {Motivation: High-resolution mass spectrometry (MS) is among the most widely used technologies in metabolomics. Metabolites participate in almost all cellular processes, but most metabolites still remain uncharacterized. Determination of the sum formula is a crucial step in the identification of an unknown metabolite, as it reduces its possible structures to a hopefully manageable set. Results: We present a method for determining the sum formula of a metabolite solely from its mass and the natural distribution of its isotopes. Our input is a measured isotope pattern from a high resolution mass spectrometer, and we want to find those molecules that best match this pattern. Our method is computationally efficient, and results on experimental data are very promising: For orthogonal time-of-flight mass spectrometry, we correctly identify sum formulas for more than 90\% of the molecules, ranging in mass up to 1000 Da. Availability: SIRIUS is available under the LGPL license at http://bio.informatik.uni-jena.de/sirius/. Contact: },
    doi = {10.1093/bioinformatics/btn603},
    file = {:2009/BoeckerEtAl_SiriusDecomposingIsotopePatterns_Bioinf_2009.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; ms},
    owner = {apervukh},
    pmid = {19015140},
    timestamp = {2008.11.06},
    url = {http://bioinformatics.oxfordjournals.org/cgi/content/full/25/2/218},
    }
  • [DOI] Böcker, S. & Pervukhin, A. (2009) Inferring peptide composition from molecular formulas Springer, berlin, 277–286.
    [Bibtex]
    @InProceedings{boecker09inferring,
    author = {Sebastian B\"ocker and Anton Pervukhin},
    title = {Inferring peptide composition from molecular formulas},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2009)},
    year = {2009},
    volume = {5609},
    series = lncs,
    pages = {277--286},
    publisher = Springer,
    abstract = {With the advent of novel mass spectrometry techniques such as Orbitrap MS, it is possible to determine the exact molecular formula of an unknown molecule solely from its isotope pattern. But for protein mass spectrometry, one is facing the problem that many peptides have exactly the same molecular formula even when ignoring the order of amino acids. In this work, we present an efficient method to determine the amino acid composition of an unknown peptide solely from its molecular formula. Our solution is based on efficiently enumerating all solutions of the multi-dimensional equality constrained integer knapsack problem.},
    doi = {10.1007/978-3-642-02882-3},
    file = {:2009/BoeckerPervukhin_InferringPeptideCompositionMolecularFormulas_Cocoon_2009.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {apervukh},
    timestamp = {2009.02.19},
    }
  • [DOI] Böcker, S., Rasche, F. & Steijger, T. (2009) Annotating fragmentation patterns Springer, berlin, 13–24.
    [Bibtex]
    @InProceedings{boecker09annotating,
    author = {Sebastian B\"ocker and Florian Rasche and Tamara Steijger},
    title = {Annotating fragmentation patterns},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI~2009)},
    year = {2009},
    volume = {5724},
    series = lncs,
    pages = {13--24},
    publisher = Springer,
    abstract = {Mass spectrometry is one of the key technologies in metabolomics for the identification and quantification of molecules in small concentrations. For identification, these molecules are fragmented by, e.g., tandem mass spectrometry, and masses and abundances of the resulting fragments are measured. Recently, methods for de novo interpretation of tandem mass spectra and the automated inference of fragmentation patterns have been developed. If the correct structural formula is known, then peaks in the fragmentation pattern can be annotated by substructures of the underlying compound. To determine the structure of these fragments manually is tedious and time-consuming. Hence, there is a need for automated identification of the generated fragments. In this work, we consider the problem of annotating fragmentation patterns. Our input are fragmentation trees, representing tandem mass spectra where each peak has been assigned a molecular formula, and fragmentation dependencies are known. Given a fixed structural formula and any fragment molecular formula, we search for all structural fragments that satisfy elemental multiplicities. Ultimately, we search for a fragmentation pattern annotation with minimum total cleavage costs. We discuss several algorithmic approaches for this problem, including a randomized and a tree decomposition-based algorithm. We find that even though the problem of identifying structural fragments is NP-hard, instances based on molecular structures can be efficiently solved with a classical branch-and-bound algorithm.},
    doi = {10.1007/978-3-642-04241-6},
    file = {BoeckerEtAl_AnnotatingFragmentationPatterns_WABI_2009.pdf:2009/BoeckerEtAl_AnnotatingFragmentationPatterns_WABI_2009.pdf:PDF},
    keywords = {jena; linkpdf; tandem ms; metabolite ms; ft; ms; idun; PABI; fpt; TrACReview},
    owner = {Sebastian},
    timestamp = {2009.05.31},
    }
  • [DOI] Bertsch, A., Leinenbach, A., Pervukhin, A., Lubeck, M., Hartmer, R., Baessmann, C., Elnakady, Y. A., Müller, R., Böcker, S., Huber, C. G. & Kohlbacher, O. (2009) De novo peptide sequencing by tandem MS using complementary CID and electron transfer dissociation. IN Electrophoresis, 30.3736–3747.
    [Bibtex]
    @Article{bertsch09de-novo,
    author = {Andreas Bertsch and Andreas Leinenbach and Anton Pervukhin and Markus Lubeck and Ralf Hartmer and Carsten Baessmann and Yasser Abbas Elnakady and Rolf M\"uller and Sebastian B\"ocker and Christian G Huber and Oliver Kohlbacher},
    title = {De novo peptide sequencing by tandem {MS} using complementary {CID} and electron transfer dissociation},
    journal = {Electrophoresis},
    year = {2009},
    volume = {30},
    number = {21},
    pages = {3736--3747},
    abstract = {De novo sequencing of peptides using tandem MS is difficult due to missing fragment ions in the spectra commonly obtained after CID of peptide precursor ions. Complementing CID spectra with spectra obtained in an ion-trap mass spectrometer upon electron transfer dissociation (ETD) significantly increases the sequence coverage with diagnostic ions. In the de novo sequencing algorithm CompNovo presented here, a divide-and-conquer approach was combined with an efficient mass decomposition algorithm to exploit the complementary information contained in CID and ETD spectra. After optimizing the parameters for the algorithm on a well-defined training data set obtained for peptides from nine known proteins, the CompNovo algorithm was applied to the de novo sequencing of peptides derived from a whole protein extract of Sorangium cellulosum bacteria. To 2406 pairs of CID and ETD spectra contained in this data set, 675 fully correct sequences were assigned, which represent a success rate of 28.1\%. It is shown that the CompNovo algorithm yields significantly improved sequencing accuracy as compared with published approaches using only CID spectra or combined CID and ETD spectra.},
    doi = {10.1002/elps.200900332},
    file = {:2009/BertschEtAl_DeNovoPeptideSequencing_Electrophoresis_2009.pdf:PDF},
    keywords = {ms, peptide sequencing, jena},
    owner = {Sebastian},
    pmid = {19862751},
    timestamp = {2009.10.31},
    }
  • [DOI] Dondrup, M., Albaum, S. P., Griebel, T., Henckel, K., Jünemann, S., Kahlke, T., Kleindt, C. K., Küster, H., Linke, B., Mertens, D., Mittard-Runte, V., Neuweger, H., Runte, K. J., Tauch, A., Tille, F., Pühler, A. & Goesmann, A. (2009) EMMA 2 - a MAGE-compliant system for the collaborative analysis and integration of microarray data. IN Bmc bioinf, 10.50.
    [Bibtex]
    @Article{dondrup09emma,
    author = {Michael Dondrup and Stefan P Albaum and Thasso Griebel and Kolja Henckel and Sebastian J\"unemann and Tim Kahlke and Christiane K Kleindt and Helge K\"uster and Burkhard Linke and Dominik Mertens and Virginie Mittard-Runte and Heiko Neuweger and Kai J Runte and Andreas Tauch and Felix Tille and Alfred P\"uhler and Alexander Goesmann},
    title = {{EMMA 2} - A {MAGE}-compliant system for the collaborative analysis and integration of microarray data},
    journal = {BMC Bioinf},
    year = {2009},
    volume = {10},
    pages = {50},
    abstract = {Understanding transcriptional regulation by genome-wide microarray studies can contribute to unravel complex relationships between genes. Attempts to standardize the annotation of microarray data include the Minimum Information About a Microarray Experiment (MIAME) recommendations, the MAGE-ML format for data interchange, and the use of controlled vocabularies or ontologies. The existing software systems for microarray data analysis implement the mentioned standards only partially and are often hard to use and extend. Integration of genomic annotation data and other sources of external knowledge using open standards is therefore a key requirement for future integrated analysis systems. The EMMA 2 software has been designed to resolve shortcomings with respect to full MAGE-ML and ontology support and makes use of modern data integration techniques. We present a software system that features comprehensive data analysis functions for spotted arrays, and for the most common synthesized oligo arrays such as Agilent, Affymetrix and NimbleGen. The system is based on the full MAGE object model. Analysis functionality is based on R and Bioconductor packages and can make use of a compute cluster for distributed services. Our model-driven approach for automatically implementing a full MAGE object model provides high flexibility and compatibility. Data integration via SOAP-based web-services is advantageous in a distributed client-server environment as the collaborative analysis of microarray data is gaining more and more relevance in international research consortia. The adequacy of the EMMA 2 software design and implementation has been proven by its application in many distributed functional genomics projects. Its scalability makes the current architecture suited for extensions towards future transcriptomics methods based on high-throughput sequencing approaches which have much higher computational requirements than microarrays.},
    doi = {doi:10.1186/1471-2105-10-50},
    keywords = {jena; linkpdf;},
    owner = {thasso},
    pmid = {19200358},
    timestamp = {2009.02.24},
    url = {http://www.biomedcentral.com/1471-2105/10/50},
    }
  • Pervukhin, A. (2009) Molecular formula identification using high resolution mass spectrometry. PhD Thesis. Jena; Germany, .
    [Bibtex]
    @PhdThesis{pervukhin09molecular,
    author = {Anton Pervukhin},
    title = {Molecular Formula Identification using High Resolution Mass Spectrometry},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2009},
    address = {Jena; Germany},
    file = {:2009/Pervukhin_MolecularFormulaIdentification_phdthesis_2009.pdf:PDF},
    keywords = {jena; linkpdf; high-resolution ms; metabolite ms; TrACReview},
    owner = {apervukh},
    timestamp = {2009.10.17},
    url = {http://www.db-thueringen.de/servlets/DocumentServlet?id=15123},
    }
  • [DOI] Scherbart, A., Timm, W., Böcker, S. & Nattkemper, T. W. (2009) Improved mass spectrometry peak intensity prediction by adaptive feature weighting Springer, berlin, 513–520.
    [Bibtex]
    @InProceedings{scherbart09improved,
    author = {Alexandra Scherbart and Wiebke Timm and Sebastian B\"ocker and Tim W. Nattkemper},
    title = {Improved Mass Spectrometry Peak Intensity Prediction by Adaptive Feature Weighting},
    booktitle = {Proc. of International Conference on Neural Information Processing of the Asia-Pacific Neural Network Assembly (ICONIP 2008)},
    year = {2009},
    volume = {5506},
    series = lncs,
    pages = {513--520},
    publisher = Springer,
    abstract = {Mass spectrometry (MS) is a key technique for the analysis and identification of proteins. A prediction of spectrum peak intensities from pre-computed molecular features would pave the way to a better understanding of spectrometry data and improved spectrum evaluation. The goal is to model the relationship between peptides and peptide peak heights in MALDI-TOF mass spectra, only using the peptide's sequence information and the chemical properties. To cope with this high dimensional data, we propose a regression based combination of feature weightings and a linear predictor to focus on relevant features. This offers simpler models, scalability, and better generalization. We show that the overall performance utilizing the estimation of feature relevance and re-training compared to using the entire feature space can be improved.},
    doi = {10.1007/978-3-642-02490-0_63},
    file = {:2008/ScherbartEtAl_ImprovedMassSpectrometry_ICONIP_2008.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2008.10.07},
    }
  • Steijger, T. (2009) Automated annotation of fragmentation patterns. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{steijger09automated,
    author = {Tamara Steijger},
    title = {Automated Annotation of Fragmentation Patterns},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2009},
    type = {Diplomarbeit},
    keywords = {jena; tandem ms; metabolite ms; ft; ms; idun; PABI; fpt},
    owner = {Sebastian},
    timestamp = {2009.10.15},
    }
  • Zichner, T. (2009) Analysis of unknown metabolites by aligning MS fragmentation patterns. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{zichner09analysis,
    author = {Thomas Zichner},
    title = {Analysis of Unknown Metabolites by Aligning {MS} Fragmentation Patterns},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2009},
    type = {Diplomarbeit},
    keywords = {jena; tandem ms; metabolite ms; ft; tree alignment; idun; ms},
    owner = {Sebastian},
    timestamp = {2009.10.15},
    }

2008

  • Böcker, S., Briesemeister, S., Bui, Q. B. A. & Truss, A. (2008) A fixed-parameter approach for weighted cluster editing Imperial college press, 211–220.
    [Bibtex]
    @InProceedings{boecker08fixed-parameter,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Quang Bao Anh Bui and Anke Truss},
    title = {A fixed-parameter approach for Weighted Cluster Editing},
    booktitle = {Proc. of Asia-Pacific Bioinformatics Conference (APBC 2008)},
    year = {2008},
    volume = {5},
    series = {Series on Advances in Bioinformatics and Computational Biology},
    pages = {211--220},
    publisher = {Imperial College Press},
    abstract = {Clustering objects with respect to a given similarity or distance measure is a problem often encountered in computational biology. Several well-known clustering algorithms are based on transforming the input matrix into a weighted graph although the resulting Weighted Cluster Editing problem is computationally hard: here, we transform the input graph into a disjoint union of cliques such that the sum of weights of all modified edges is minimized. We present fixed-parameter algorithms for this problem which guarantee to find an optimal solution in provable worst-case running time. We introduce a new data reduction operation (merging vertices) that has no counterpart in the unweighted case and strongly cuts down running times in practice. We have applied our algorithms to both artificial and biological data. Despite the complexity of the problem, our method often allows exact computation of optimal solutions in reasonable running time.},
    data = {cluster_editing_data},
    file = {BoeckerEtAl_FixedParameterApproachWeightedClusterEditing_APBC_2008.pdf:2008/BoeckerEtAl_FixedParameterApproachWeightedClusterEditing_APBC_2008.pdf:PDF},
    keywords = {jena; FPT; linkpdf; PABI},
    opteditor = {Alvia Brazma and Satoru Miyano and Tatsuya Akutsu},
    owner = {Sebastian},
    timestamp = {2008.10.06},
    url = {http://ebooks.worldscinet.com/ISBN/9781848161092/9781848161092_0023.html},
    }
  • [DOI] Böcker, S., Briesemeister, S., Bui, Q. B. A. & Truss, A. (2008) Going weighted: parameterized algorithms for cluster editing Springer, berlin, 1–12.
    [Bibtex]
    @InProceedings{boecker08going,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Quang Bao Anh Bui and Anke Truss},
    title = {Going Weighted: Parameterized Algorithms for Cluster Editing},
    booktitle = {Proc. of Conference on Combinatorial Optimization and Applications (COCOA 2008)},
    year = {2008},
    volume = {5165},
    series = lncs,
    pages = {1--12},
    publisher = Springer,
    abstract = {The goal of the Cluster Editing problem is to make the fewest changes to the edge set of an input graph such that the resulting graph is a disjoint union of cliques. This problem is NP-complete but recently, several parameterized algorithms have been proposed. In this paper we present a surprisingly simple branching strategy for Cluster Editing. We generalize the problem assuming that edge insertion and deletion costs are positive integers. We show that the resulting search tree has size O(1.82^k) for edit cost k, resulting in the currently fastest parameterized algorithm for this problem. We have implemented and evaluated our approach, and find that it outperforms other parametrized algorithms for the problem.},
    data = {cluster_editing_data},
    doi = {10.1007/978-3-540-85097-7_1},
    file = {BoeckerEtAl_GoingWeightedParameterized_COCOA_2008.pdf:2008/BoeckerEtAl_GoingWeightedParameterized_COCOA_2008.pdf:PDF},
    journalref = {boecker09going},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {Sebastian},
    timestamp = {2009.03.08},
    }
  • [DOI] Böcker, S., Briesemeister, S. & Klau, G. W. (2008) Exact algorithms for cluster editing: evaluation and experiments Springer, berlin, 289–302.
    [Bibtex]
    @InProceedings{boecker08exact,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Gunnar W. Klau},
    title = {Exact Algorithms for Cluster Editing: Evaluation and Experiments},
    booktitle = {Proc. of Workshop on Experimental Algorithms (WEA 2008)},
    year = {2008},
    volume = {5038},
    series = lncs,
    pages = {289--302},
    publisher = Springer,
    abstract = {We present empirical results for the Cluster Editing problem using exact methods from fixed-parameter algorithmics and linear programming. We investigate parameter-independent data reduction methods and find that effective preprocessing is possible if the number of edge modifications k is smaller than some multiple of |V|. In particular, combining parameter-dependent data reduction with lower and upper bounds we can effectively reduce graphs satisfying k <= 25 |V|. In addition to the fastest known fixed-parameter branching strategy for the problem, we investigate an integer linear program (ILP) formulation of the problem using a cutting plane approach. Our results indicate that both approaches are capable of solving large graphs with 1000 vertices and several thousand edge modifications. For the first time, complex and very large graphs such as biological instances allow for an exact solution, using a combination of the above techniques.},
    data = {cluster_editing_data},
    doi = {10.1007/978-3-540-68552-4_22},
    file = {BoeckerEtAl_ExactAlgorithmsClusterEditing_WEA_2008.pdf:2008/BoeckerEtAl_ExactAlgorithmsClusterEditing_WEA_2008.pdf:PDF},
    journalref = {boecker11exact},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {Sebastian},
    timestamp = {2008.03.03},
    }
  • [DOI] Böcker, S., Bui, Q. B. A. & Truss, A. (2008) An improved fixed-parameter algorithm for minimum-flip consensus trees IN Grohe, M. & Niedermeier, R. (Eds.), .Springer-verlag, 43–54.
    [Bibtex]
    @InProceedings{boecker08improved,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Anke Truss},
    title = {An Improved Fixed-Parameter Algorithm for Minimum-Flip Consensus Trees},
    booktitle = {Proc. of International Workshop on Parameterized and Exact Computation (IWPEC 2008)},
    year = {2008},
    editor = {Martin Grohe and Rolf Niedermeier},
    volume = {5018},
    series = lncs,
    pages = {43--54},
    publisher = SV,
    abstract = {In computational phylogenetics, the problem of constructing a consensus tree for a given set of input trees has frequently been addressed. In this paper we study the \textsc{Minimum-Flip Problem}: the input trees are transformed into a binary matrix, and we want to find a perfect phylogeny for this matrix using a minimum number of flips, that is, corrections of single entries in the matrix. In its graph-theoretical formulation, the problem is as follows: Given a bipartite graph G=(V_t \cup V_c, E), the problem is to find a minimum set of edge modifications such that the resulting graph has no induced path with four edges which starts and ends in V_t. We present a fixed-parameter algorithm for the Minimum-Flip Problem with running time O(4.83^k (m+n) + mn) for n taxa, m characters, and k flips. Additionally, we discuss several heuristic improvements. We also report computational results on phylogenetic data.},
    doi = {10.1007/978-3-540-79723-4},
    file = {BoeckerEtAl_ImprovedFixedParameterAlgorithmMinimumFlipConsensusTrees_IWPEC_2008.pdf:2008/BoeckerEtAl_ImprovedFixedParameterAlgorithmMinimumFlipConsensusTrees_IWPEC_2008.pdf:PDF},
    journalref = {boecker12improved},
    keywords = {jena; FPT; linkpdf; phylogenetics; PABI},
    owner = {baoanh},
    timestamp = {2008.02.15},
    }
  • [DOI] Böcker, S., Jahn, K., Mixtacki, J. & Stoye, J. (2008) Computation of median gene clusters Springer, berlin, 331–345.
    [Bibtex]
    @InProceedings{boecker08computation,
    author = {Sebastian B\"ocker and Katharina Jahn and Julia Mixtacki and Jens Stoye},
    title = {Computation of Median Gene Clusters},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2008)},
    year = {2008},
    volume = {4955},
    series = lncs,
    pages = {331--345},
    publisher = Springer,
    abstract = {Whole genome comparison based on gene order has become a popular approach in comparative genomics. An important task in this field is the detection of gene clusters, i.e. sets of genes that occur colocalized in several genomes. For most applications it is preferable to extend this definition to allow for small deviations in the gene content of the cluster occurrences. However, relaxing the equality constraint increases the computational complexity of gene cluster detection drastically. Existing approaches deal with this problem by using simplifying constraints on the cluster definition and/or allowing only pairwise genome comparison. In this paper we introduce a cluster concept named median gene clusters that improves over existing models and present efficient algorithms for their computation that allow for the detection of approximate gene clusters in multiple genomes.},
    doi = {10.1007/978-3-540-78839-3_28},
    file = {BoeckerEtAl_ComputationMedianGeneClusters_RECOMB_2008.pdf:2008/BoeckerEtAl_ComputationMedianGeneClusters_RECOMB_2008.pdf:PDF},
    journalref = {boecker09computation},
    keywords = {jena; linkpdf; 3AGC; gene clusters, genome rearrangements},
    owner = {Sebastian},
    timestamp = {2007.12.11},
    }
  • [DOI] Böcker, S., relax Zs, Martin, M., Pervukhin, A. & Sudek, H. (2008) DECOMP–from interpreting mass spectrometry peaks to solving the Money Changing Problem. IN Bioinformatics, 24.591–593.
    [Bibtex]
    @Article{boecker08decomp,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k and Marcel Martin and Anton Pervukhin and Henner Sudek},
    title = {{DECOMP}--from interpreting Mass Spectrometry peaks to solving the {Money Changing Problem}},
    journal = {Bioinformatics},
    year = {2008},
    volume = {24},
    number = {4},
    pages = {591--593},
    abstract = {We introduce DECOMP, a tool that computes the sum formula of all molecules whose mass equals the input mass. This problem arises frequently in biochemistry and mass spectrometry (MS), when we know the molecular mass of a protein, DNA, or metabolite fragment but have no other information. A closely related problem is known as the Money Changing Problem (MCP), where all masses are positive integers. Recently, efficient algorithms have been developed for the MCP, which DECOMP applies to real valued MS data. The excellent performance of this method on proteomic and metabolomic MS data has recently been demonstrated. DECOMP has an easy-to-use graphical interface, which caters for both types of users: those interested in solving MCP instances and those submitting MS data.},
    doi = {10.1093/bioinformatics/btm631},
    file = {BoeckerEtAl_DecompInterpretingMassSpectrometry_Bioinf_2008.pdf:2008/BoeckerEtAl_DecompInterpretingMassSpectrometry_Bioinf_2008.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; metabolite ms; TrACReview},
    owner = {Sebastian},
    pmid = {18174179},
    timestamp = {2007.11.05},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/24/4/591?ijkey=1lM50Bkzz4SCLsa&keytype=ref},
    }
  • [DOI] Böcker, S. & Mäkinen, V. (2008) Combinatorial approaches for mass spectra recalibration. IN Ieee/acm trans comput biology bioinform, 5.91–100.
    [Bibtex]
    @Article{boecker08combinatorial,
    author = {Sebastian B\"ocker and Veli M\"akinen},
    title = {Combinatorial Approaches for Mass Spectra Recalibration},
    journal = TCBB,
    year = {2008},
    volume = {5},
    number = {1},
    pages = {91--100},
    issn = {1545-5963},
    abstract = {Mass spectrometry has become one of the most popular analysis techniques in Proteomics and Systems Biology. With the creation of larger datasets, the automated recalibration of mass spectra becomes important to ensure that every peak in the sample spectrum is correctly assigned to some peptide and protein. Algorithms for recalibrating mass spectra have to be robust with respect to wrongly assigned peaks, as well as efficient due to the amount of mass spectrometry data. The recalibration of mass spectra leads us to the problem of finding an optimal matching between mass spectra under measurement errors.We have developed two deterministic methods that allow robust computation of such a matching: The first approach uses a computational geometry interpretation of the problem, and tries to find two parallel lines with constant distance that stab a maximal number of points in the plane. The second approach is based on finding a maximal common approximate subsequence, and improves existing algorithms by one order of magnitude exploiting the sequential nature of the matching problem. We compare our results to a computational geometry algorithm using a topological line-sweep.},
    doi = {10.1109/tcbb.2007.1077},
    file = {BoeckerMaekinen_CombinatorialApproachesMassSpectra_TCBB_2008.pdf:2008/BoeckerMaekinen_CombinatorialApproachesMassSpectra_TCBB_2008.pdf:PDF},
    keywords = {jena; linkpdf; ms, recalibration, computational geometry},
    owner = {Sebastian},
    pmid = {18245878},
    timestamp = {2007.09.22},
    }
  • [DOI] Böcker, S. & Rasche, F. (2008) Towards de novo identification of metabolites by analyzing tandem mass spectra. IN Bioinformatics, 24.I49-I55.
    [Bibtex]
    @Article{boecker08towards,
    author = {Sebastian B\"ocker and Florian Rasche},
    title = {Towards de novo identification of metabolites by analyzing tandem mass spectra},
    journal = {Bioinformatics},
    year = {2008},
    volume = {24},
    pages = {I49-I55},
    note = {Proc. of \emph{European Conference on Computational Biology} (ECCB 2008)},
    doi = {10.1093/bioinformatics/btn270},
    file = {BoeckerRasche_TowardsDeNovoIdentification_Bioinf_2008.pdf:2008/BoeckerRasche_TowardsDeNovoIdentification_Bioinf_2008.pdf:PDF},
    keywords = {jena; linkpdf; metabolite ms; ms; idun; ft; TrACReview},
    owner = {rasche},
    pmid = {18689839},
    timestamp = {08.04.2008},
    }
  • [DOI] Griebel, T., Brinkmeyer, M. & Böcker, S. (2008) EPoS: a modular software framework for phylogenetic analysis. IN Bioinformatics, 24.2399–2400.
    [Bibtex]
    @Article{griebel08epos,
    author = {Thasso Griebel and Malte Brinkmeyer and Sebastian B\"ocker},
    title = {{EPoS}: A modular software framework for phylogenetic analysis},
    journal = {Bioinformatics},
    year = {2008},
    volume = {24},
    number = {20},
    pages = {2399--2400},
    doi = {10.1093/bioinformatics/btn364},
    keywords = {jena; phylogenetics},
    owner = {thasso},
    pmid = {18632748},
    timestamp = {2008.07.16},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btn364?ijkey=08ZjFqzfs4WCjFn&keytype=ref},
    }
  • Kehr, B. (2008) Structure determination of glycans by analyzing tandem mass spectra. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{kehr08structure,
    author = {Birte Kehr},
    title = {Structure Determination of Glycans by Analyzing Tandem Mass Spectra},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2008},
    type = {Diplomarbeit},
    file = {Kehr_StructureDeterminationGlycans_Diplomarbeit_2008.pdf:2008/Kehr_StructureDeterminationGlycans_Diplomarbeit_2008.pdf:PDF},
    keywords = {fpt; jena; linkpdf; glycan ms; ms; PABI},
    owner = {rasche},
    timestamp = {2009.01.22},
    }
  • Rasche, F. (2008) Analysis of metabolite tandem mass spectra. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{rasche08analysis,
    author = {Florian Rasche},
    title = {Analysis of Metabolite Tandem Mass Spectra},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2008},
    type = {Diplomarbeit},
    keywords = {jena; tandem ms; metabolite ms; idun; ft},
    owner = {rasche},
    timestamp = {13.02.2008},
    }
  • [DOI] Timm, W., Scherbart, A., Böcker, S., Kohlbacher, O. & Nattkemper, T. W. (2008) Peak intensity prediction in MALDI-TOF mass spectrometry: a machine learning study to support quantitative proteomics. IN Bmc bioinf, 9.443.
    [Bibtex]
    @Article{timm08peak,
    author = {Wiebke Timm and Alexandra Scherbart and Sebastian B\"ocker and Oliver Kohlbacher and Tim W. Nattkemper},
    title = {Peak Intensity Prediction in {MALDI-TOF} Mass Spectrometry: A Machine Learning Study to Support Quantitative Proteomics},
    journal = {BMC Bioinf},
    year = {2008},
    volume = {9},
    pages = {443},
    abstract = {Background. Mass spectrometry is a key technique in proteomics and can be used to analyze complex samples quickly. One key problem with the mass spectrometric analysis of peptides and proteins, however, is the fact that absolute quantification is severely hampered by the unclear relationship between the observed peak intensity and the peptide concentration in the sample. While there are numerous approaches to circumvent this problem experimentally (e.g. labeling techniques), reliable prediction of the peak intensities from peptide sequences could provide a peptide-specific correction factor. Thus, it would be a valuable tool towards label-free absolute quantification. Results. In this work we present machine learning techniques for peak intensity prediction for MALDI mass spectra. Features encoding the peptides' physico-chemical properties as well as string-based features were extracted. A feature subset was obtained from multiple forward feature selections on the extracted features. Based on these features, two advanced machine learning methods (support vector regression and local linear maps) are shown to yield good results for this problem (Pearson correlation of 0.68 in a ten-fold cross validation). Conclusions. The techniques presented here are a useful first step going beyond the binary prediction of proteotypic peptides towards a more quantitative prediction of peak intensities. These predictions in turn will turn out to be beneficial for mass spectrometry-based quantitative proteomics.},
    doi = {10.1186/1471-2105-9-443},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    pmid = {18937839},
    timestamp = {2008.10.06},
    url = {http://www.biomedcentral.com/1471-2105/9/443},
    }

2007

  • [DOI] Böcker, S. (2007) Simulating multiplexed SNP discovery rates using base-specific cleavage and mass spectrometry. IN Bioinformatics, 23.e5-e12.
    [Bibtex]
    @Article{boecker07simulating,
    author = {Sebastian B\"ocker},
    title = {Simulating multiplexed {SNP} discovery rates using base-specific cleavage and mass spectrometry},
    journal = {Bioinformatics},
    year = {2007},
    volume = {23},
    number = {2},
    pages = {e5-e12},
    note = {Proc. of \emph{European Conference on Computational Biology} (ECCB 2006)},
    abstract = {Motivation: Single Nucleotide Polymorphisms (SNPs) are believed to contribute strongly to the genetic variability in living beings, and SNP and mutation discovery are of great interest in today's Life Sciences. A comparatively new method to discover such polymorphisms is based on base-specific cleavage, where resulting cleavage products are analyzed by mass spectrometry (MS). One particular advantage of this method is the possibility of multiplexing the biochemical reactions, i.e. examining multiple genomic regions in parallel. Simulations can help estimating the performance of a method for polymorphism discovery, and allow us to evaluate the influence of method parameters on the discovery rate, and also to investigate whether the method is well suited for a certain genomic region. Results: We show how to efficiently conduct such simulations for polymorphism discovery using base-specific cleavage and MS. Simulating multiplexed polymorphism discovery leads us to the problem of uniformly drawing a multiplex. Given a multiset of natural numbers we want to uniformly draw a subset of fixed cardinality so that the elements sum up to some fixed total length. We show how to enumerate multiplex layouts using dynamic programming, which allows us to uniformly draw a multiplex.},
    doi = {10.1093/bioinformatics/btl291},
    file = {Boecker_SimulatingMultiplexedSNPDiscoveryRates_ECCB_2006.pdf:2006/Boecker_SimulatingMultiplexedSNPDiscoveryRates_ECCB_2006.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    owner = {Sebastian},
    pmid = {17237104},
    timestamp = {2006.10.17},
    }
  • [DOI] Böcker, S. & Kaltenbach, H. (2007) Mass spectra alignments and their significance. IN J discrete algorithms, 5.714–728.
    [Bibtex]
    @Article{boecker07mass,
    author = {Sebastian B\"ocker and Hans-Michael Kaltenbach},
    title = {Mass Spectra Alignments and Their Significance},
    journal = {J Discrete Algorithms},
    year = {2007},
    volume = {5},
    number = {4},
    pages = {714--728},
    abstract = {Mass Spectrometry has become one of the most popular analysis techniques in Genomics and Systems Biology. We investigate a general framework that allows the alignment (or matching) of any two mass spectra. In particular, we examine the alignment of a reference mass spectrum generated in silico from a database, with a measured sample mass spectrum. In this context, we assess the significance of alignment scores for character-specific cleavage experiments, such as tryptic digestion of amino acids. We present an efficient approach to estimate this significance, with runtime linear in the number of detected peaks. In this context, we investigate the probability that a random string over a weighted alphabet contains a substring of some given weight.},
    comment = {Definition of Mass Spectra Alignments, Combinatorial derivation of fragment, mass-occurrence probabilities, estimation of moments of score distribution},
    doi = {10.1016/j.jda.2006.11.003},
    file = {BoeckerKaltenbach_MassSpectraAlignments_JDiscAlgor_2007.pdf:2007/BoeckerKaltenbach_MassSpectraAlignments_JDiscAlgor_2007.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Standard},
    timestamp = {2006.12.04},
    }
  • [DOI] Böcker, S. & relax Zs (2007) A fast and simple algorithm for the Money Changing Problem. IN Algorithmica, 48.413–432.
    [Bibtex]
    @Article{boecker07fast,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k},
    title = {A fast and simple algorithm for the {Money Changing Problem}},
    journal = {Algorithmica},
    year = {2007},
    volume = {48},
    number = {4},
    pages = {413--432},
    abstract = {The Money Changing Problem (MCP) can be stated as follows: Given $k$ positive integers $a_1< \ldots < a_k$ and a query integer $M$, is there a linear combination $\sum_i c_ia_i = M$ with non-negative integers $c_i$, a \emph{decomposition} of $M$? If so, produce one or all such decompositions. The largest integer without such a decomposition is called the \emph{Frobenius number} $g(a_1,\ldots,a_k)$. A data structure called {\em residue table} of $a_1$ words can be used to compute the Frobenius number in time $O(a_1)$. We present an intriguingly simple algorithm for computing the residue table which runs in time $O(ka_1)$, with no additional memory requirements, outperforming the best previously known algorithm. Simulations show that it performs well even on 'hard' instances from the literature. In addition, we can employ the residue table to answer MCP decision instances in constant time, and a slight modification of size $O(a_1)$ to compute one decomposition for a query $M$. Note that since both, computing the Frobenius number and MCP (decision) are NP-hard, one cannot expect to find an algorithm that is polynomial in the size of the input, i.e., in $k,\log a_k$, and $\log M$. We then give an algorithm which, using a modification of the residue table, also constructible in $O(ka_1)$ time, computes all decompositions of a query integer $M$. Its worst-case running time is $O(ka_1)$ for each decomposition, thus the total runtime depends only on the output size and is independent of the size of query $M$ itself. We apply our latter algorithm to interpreting mass spectrometry (MS) peaks: Due to its high speed and accuracy, MS is now the method of choice in protein identification. Interpreting individual peaks is one of the recurring subproblems in analyzing MS data; the task is to identify sample molecules whose mass the peak possibly represents. This can be stated as an MCP instance, with the masses of the individual amino acids as the $k$ integers $a_1,\ldots, a_k$. Our simulations show that our algorithm is fast on real data and is well suited for generating candidates for peak interpretation.},
    doi = {10.1007/s00453-007-0162-8},
    file = {BoeckerLiptak_FastSimpleAlgorithmMoneyChanging_Algorithmica_2007.pdf:2007/BoeckerLiptak_FastSimpleAlgorithmMoneyChanging_Algorithmica_2007.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; frobenius},
    owner = {Sebastian},
    timestamp = {2006.10.17},
    }
  • Briesemeister, S. (2007) Weighted cluster editing for clustering biological data. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{briesemeister07weighted,
    author = {Sebastian Briesemeister},
    title = {Weighted Cluster Editing for clustering biological data},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2007},
    type = {Diplomarbeit},
    keywords = {jena; fpt; PABI},
    owner = {m2brse},
    timestamp = {2007.12.16},
    }
  • Hüffner, F. (2007) Algorithms and experiments for parameterized approaches to hard graph problems. PhD Thesis. .
    [Bibtex]
    @PhdThesis{hueffner07algorithms,
    author = {Falk H\"uffner},
    title = {Algorithms and Experiments for Parameterized Approaches to Hard Graph Problems},
    school = {Institut f{\"u}r Informatik, Friedrich-Schiller-Universit{\"a}t Jena},
    year = {2007},
    keywords = {fpt},
    owner = {anke},
    timestamp = {2009.03.09},
    url = {http://theinf1.informatik.uni-jena.de/publications/diss-hueffner07.pdf},
    }
  • [DOI] Kaltenbach, H., Wilke, A. & Böcker, S. (2007) SAMPI: protein identification with mass spectra alignments. IN Bmc bioinf, 8.102.
    [Bibtex]
    @Article{kaltenbach07sampi,
    author = {Hans-Michael Kaltenbach and Andreas Wilke and Sebastian B\"ocker},
    title = {{SAMPI}: Protein identification with Mass Spectra Alignments},
    journal = {BMC Bioinf},
    year = {2007},
    volume = {8},
    pages = {102},
    abstract = {Background: Mass spectrometry based peptide mass fingerprints (PMFs) offer a fast, efficient, and robust method for protein identification. A protein is digested (usually by trypsin) and its mass spectrum is compared to simulated spectra for protein sequences in a database. However, existing tools for analyzing PMFs often suffer from missing or heuristic analysis of the significance of search results and insufficient handling of missing and additional peaks. Results: We present an unified framework for analyzing Peptide Mass Fingerprints that offers a number of advantages over existing methods: First, comparison of mass spectra is based on a scoring function that can be custom-designed for certain applications and explicitly takes missing and additional peaks into account. The method is able to simulate almost every additive scoring scheme. Second, we present an efficient deterministic method for assessing the significance of a protein hit, independent of the underlying scoring function and sequence database. We prove the applicability of our approach using biological mass spectrometry data and compare our results to the standard software Mascot. Conclusion: The proposed framework for analyzing Peptide Mass Fingerprints shows performance comparable to Mascot on small peak lists. Introducing more noise peaks, we are able to keep identification rates at a similar level by using the flexibility introduced by scoring schemes.},
    doi = {10.1186/1471-2105-8-102},
    file = {KaltenbachEtAl_SampiProteinIdentification_BMCBioinf_2007.pdf:2007/KaltenbachEtAl_SampiProteinIdentification_BMCBioinf_2007.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    pmid = {17386090},
    timestamp = {2007.09.22},
    }
  • [DOI] Kaltenbach, M., Böcker, S. & Rahmann, S. (2007) Markov additive chains and applications to fragment statistics for peptide mass fingerprinting Springer, berlin, 29–41.
    [Bibtex]
    @InProceedings{kaltenbach07markov,
    author = {Michael Kaltenbach and Sebastian B\"ocker and Sven Rahmann},
    title = {{Markov} Additive Chains and Applications to Fragment Statistics for Peptide Mass Fingerprinting},
    booktitle = {Proc. of RECOMB Satellite Conference on Systems Biology and Computational Proteomics 2006},
    year = {2007},
    volume = {4532},
    series = lncs,
    pages = {29--41},
    publisher = Springer,
    doi = {10.1007/978-3-540-73060-6_3},
    file = {KaltenbachEtAl_MarkovAdditiveChains_RECOMBSatellite_2006.pdf:2006/KaltenbachEtAl_MarkovAdditiveChains_RECOMBSatellite_2006.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2007.01.02},
    }
  • Klein, J. D. (2007) Problem kernels for genome rearrangement problems. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{klein07problem,
    author = {Juliane Damaris Klein},
    title = {Problem Kernels For Genome Rearrangement Problems},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2007},
    type = {Diplomarbeit},
    abstract = {Genome rearrangement problems are motivated from phylogenetic reconstruction. While there already exist a plentitude of reconstruction methods for sequence data new methods for gene order data are required. One of these new approaches is developed by Sankoff and Blanchette [4]. For their reconstruction method an exact solution of the median problem or the center problem is essential. In this thesis we examine the fixed-parameter tractability of median and center problems. First, we consider the Reversal Median Problem which is shown to be NP-hard in [3]. We show that it is fixed-parameter tractable with the sum of distances between median and input permutations as parameter and under the assumption that the number of input permutations is constant. We show that the Reversal Median Problem has a problem kernel of linear size. Furthermore, we introduce and examine two center problems: Reversal Center Problem and Breakpoint Center Problem. For the center problems we show that they are fixed-parameter tractable with the maximum distance between center and a input permutation as parameter if we assume that the number of input permutations is constant. The main focus of this work are Induced Breakpoint Median Problem and Exemplar Breakpoint Median Problem. These median problems are more relevant in molecular biology since sequences are given instead of permutations. They are both NP-hard [1, 2]. We present for each of them a preprocessing. Furthermore, we show, for the Induced Breakpoint Median Problem, that it is fixed-parameter tractable with the sum of distance between median and input permutations or sequences as parameter if the number of input permutations or sequences is three. For the Exemplar Breakpoint Median Problem we show that it is fixed-parameter tractable with two parameters and for the case that three permutations or sequences are given. For each of these problems we show that it admits a problem kernel of linear size.},
    file = {Klein_ProblemKernelsGenomeRearrangement_diplomarbeit_2007.pdf:2007/Klein_ProblemKernelsGenomeRearrangement_diplomarbeit_2007.pdf:PDF},
    keywords = {fpt jena},
    owner = {Sebastian},
    timestamp = {2008.02.04},
    }
  • Rahmann, S., Wittkop, T., Baumbach, J., Martin, M., Truss, A. & Böcker, S. (2007) Exact and heuristic algorithms for weighted cluster editing , 391–401.
    [Bibtex]
    @InProceedings{rahmann07exact,
    author = {Sven Rahmann and Tobias Wittkop and Jan Baumbach and Marcel Martin and Anke Truss and Sebastian B\"ocker},
    title = {Exact and Heuristic Algorithms for Weighted Cluster Editing},
    booktitle = {Proc. of Computational Systems Bioinformatics (CSB 2007)},
    year = {2007},
    volume = {6},
    pages = {391--401},
    abstract = {Clustering objects according to given similarity or distance values is a ubiquitous problem in computational biology with diverse applications, e.g., in defining families of orthologous genes, or in the analysis of microarray experiments. While there exists a plenitude of methods, many of them produce clusterings that can be further improved. "Cleaning up" initial clusterings can be formalized as projecting a graph on the space of transitive graphs; it is also known as the cluster editing or cluster partitioning problem in the literature. In contrast to previous work on cluster editing, we allow arbitrary weights on the similarity graph. To solve the so-defined weighted transitive graph projection problem, we present (1) the first exact fixed-parameter algorithm, (2) a polynomial-time greedy algorithm that returns the optimal result on a well-defined subset of "close-to-transitive" graphs and works heuristically on other graphs, and (3) a fast heuristic that uses ideas similar to those from the Fruchterman-Reingold graph layout algorithm. We compare quality and running times of these algorithms on both artificial graphs and protein similarity graphs derived from the 66 organisms of the COG dataset.},
    data = {cluster_editing_data},
    file = {RahmannEtAl_ExactHeuristicAlgorithms_CSB_2007.pdf:2007/RahmannEtAl_ExactHeuristicAlgorithms_CSB_2007.pdf:PDF},
    keywords = {jena; FPT; linkpdf; clustering method; PABI},
    owner = {Sebastian},
    pmid = {17951842},
    timestamp = {2007.09.09},
    url = {http://www.lifesciencessociety.org/CSB2007/toc/391.2007.html},
    }
  • [DOI] Scherbart, A., Timm, W., Böcker, S. & Nattkemper, T. W. (2007) Neural network approach for mass spectrometry prediction by peptide prototyping Springer, berlin, 90–99.
    [Bibtex]
    @InProceedings{scherbart07neural,
    author = {Alexandra Scherbart and Wiebke Timm and Sebastian B\"ocker and Tim W. Nattkemper},
    title = {Neural network approach for mass spectrometry prediction by peptide prototyping},
    booktitle = {Proc. of International Conference on Artificial Neural Networks (ICANN 2007)},
    year = {2007},
    volume = {4669},
    series = lncs,
    pages = {90--99},
    publisher = Springer,
    abstract = {In todays bioinformatics, Mass spectrometry (MS) is the key technique for the identification of proteins. A prediction of spectrum peak intensities from pre computed molecular features would pave the way to better understanding of spectrometry data and improved spectrum evaluation. We propose a neural network architecture of Local Linear Map (LLM)-type for peptide prototyping and learning locally tuned regression functions for peak intensity prediction in MALDI-TOF mass spectra. We obtain results comparable to those obtained by nu-Support Vector Regression and show how the LLM learning architecture provides a basis for peptide feature profiling and visualisation.},
    doi = {10.1007/978-3-540-74695-9},
    file = {ScherbartEtAl_NeuralNetworkApproach_ICANN_2007.pdf:2007/ScherbartEtAl_NeuralNetworkApproach_ICANN_2007.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2007.09.10},
    }
  • [DOI] Scherbart, A., Timm, W., Böcker, S. & Nattkemper, T. W. (2007) SOM-based peptide prototyping for mass spectrometry peak intensity prediction .
    [Bibtex]
    @InProceedings{scherbart07som-based,
    author = {Alexandra Scherbart and Wiebke Timm and Sebastian B\"ocker and Tim W. Nattkemper},
    title = {{SOM}-based peptide prototyping for mass spectrometry peak intensity prediction},
    booktitle = {Proc. of Workshop on Self-Organizing Maps (WSOM 2007)},
    year = {2007},
    note = {Doi 10.2390/biecoll-wsom2007-157.},
    doi = {10.2390/biecoll-wsom2007-157},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2007.09.10},
    }
  • Steinbrück, L. (2007) Methoden des Maschinellen Lernens zur Vorhersage von Peak-Intensitäten in PMF-Massenspektren. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{steinbrueck07methoden,
    author = {Lars Steinbr\"uck},
    title = {Methoden des {Maschinellen Lernens} zur {Vorhersage} von {Peak-Intensit\"aten} in {PMF-Massenspektren}},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2007},
    type = {Diplomarbeit},
    file = {Steinbrueck_MethodenMaschinellenLernens_diplomarbeit_2007.pdf:2007/Steinbrueck_MethodenMaschinellenLernens_diplomarbeit_2007.pdf:PDF},
    keywords = {jena; linkpdf; pmf, ms},
    optmonth = sep,
    owner = {Sebastian},
    timestamp = {2008.02.04},
    }
  • Wernicke, S. & Rasche, F. (2007) Simple and fast alignment of metabolic pathways by exploiting local diversity Imperial college press, 353–362.
    [Bibtex]
    @InProceedings{wernicke07simple,
    author = {Sebastian Wernicke and Florian Rasche},
    title = {Simple and fast alignment of metabolic pathways by exploiting local diversity},
    booktitle = {Proc. of Asia-Pacific Bioinformatic Conference (APBC 2007)},
    year = {2007},
    series = {Advances in Bioinformatics and Computational Biology},
    pages = {353--362},
    publisher = {Imperial College Press},
    file = {WernickeRasche_SimpleFastAlignmentMetabolicPathways_APBC_2007.pdf:2007/WernickeRasche_SimpleFastAlignmentMetabolicPathways_APBC_2007.pdf:PDF},
    keywords = {jena; PABI; linkpdf; fpt},
    owner = {Sebastian},
    timestamp = {2007.02.06},
    url = {http://theinf1.informatik.uni-jena.de/publications/graphmatching-apbc07.pdf},
    }
  • Wernicke, S. & Rasche, F. (2007) Simple and fast alignment of metabolic pathways by exploiting local diversity. IN Bioinformatics, 23.1978–1985.
    [Bibtex]
    @Article{wernicke07simpleextended,
    author = {Sebastian Wernicke and Florian Rasche},
    title = {Simple and fast alignment of metabolic pathways by exploiting local diversity},
    journal = {Bioinformatics},
    year = {2007},
    volume = {23},
    number = {15},
    pages = {1978--1985},
    keywords = {jena; PABI; linkpdf; fpt},
    owner = {rasche},
    pmid = {17540683},
    timestamp = {2007.12.14},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btm279?ijkey=zIJ3uj3Xs736C28&keytype=ref},
    }

2006

  • [DOI] Böcker, S. (2006) Sequencing from compomers: the puzzle. IN Theory comput syst, 39.455–471.
    [Bibtex]
    @Article{boecker06sequencing,
    author = {Sebastian B\"ocker},
    title = {Sequencing from Compomers: The Puzzle},
    journal = TCSyst,
    year = {2006},
    volume = {39},
    number = {3},
    pages = {455--471},
    abstract = {The board game FragmindTM poses the following problem: The player has to reconstruct an (unknown) string s over the alphabet $\Sigma$. To this end, the game reports the following information to the player, for every character $x \in \Sigma$: First, the string s is cleaved wherever the character x is found in s. Second, every resulting fragment y is scrambled by a random permutation so that the only information left is how many times y contains each character $\sigma \in \Sigma$. These scrambled fragments are then reported to the player. Clearly, distinct strings can show identical cleavage patterns for all cleavage characters. In fact, even short strings of length 30+ usually have non-unique cleavage patterns. To this end, we introduce a generalization of the game setup called Sequencing from Compomers. We also generate those fragments of s that contain up to k uncleaved characters x, for some small and fixed threshold k. This modification dramatically increases the length of strings that can be uniquely reconstructed. We show that it is NP-hard to decide whether there exists some string compatible with the input data, but we also present a branch-and-bound runtime heuristic to find all such strings: The input data are transformed into subgraphs of the de Bruijn graph, and we search for walks in these subgraphs simultaneously. The above problem stems from the analysis of mass spectrometry data from base-specific cleavage of DNA sequences, and gives rise to a completely new approach to DNA de-novo sequencing.},
    doi = {10.1007/s00224-005-1238-y},
    file = {Boecker_SequencingFromCompomersPuzzle_TheorComputSyst_2006.pdf:2006/Boecker_SequencingFromCompomersPuzzle_TheorComputSyst_2006.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    owner = {Sebastian},
    timestamp = {2006.05.10},
    }
  • [DOI] Böcker, S., Letzel, M., relax Zs & Pervukhin, A. (2006) Decomposing metabolomic isotope patterns Springer, berlin, 12–23.
    [Bibtex]
    @InProceedings{boecker06decomposing,
    author = {Sebastian B\"ocker and Matthias Letzel and {\relax Zs}uzsanna Lipt{\'a}k and Anton Pervukhin},
    title = {Decomposing metabolomic isotope patterns},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2006)},
    year = {2006},
    volume = {4175},
    series = lncs,
    pages = {12--23},
    publisher = Springer,
    abstract = {We present a method for determining the sum formula of metabolites solely from their mass and isotope pattern. Metabolites, such as sugars or lipids, participate in almost all cellular processes, but the majority still remains uncharacterized. Our input is a measured isotope pattern from a high resolution mass spectrometer, and we want to find those molecules that best match this pattern. Determination of the sum formula is a crucial step in the identification of an unknown metabolite, as it reduces its possible structures to a hopefully manageable set. Our method is computationally efficient, and first results on experimental data indicate good identification rates for chemical compounds up to 700 Dalton. Above 1000 Dalton, the number of molecules with a certain mass increases rapidly. To efficiently analyze mass spectra of such molecules, we define several additive invariants extracted from the input and then propose to solve a joint decomposition problem.},
    doi = {10.1007/11851561_2},
    file = {BoeckerEtAl_DecomposingMetabolomicIsotopePatterns_WABI_2006.pdf:2006/BoeckerEtAl_DecomposingMetabolomicIsotopePatterns_WABI_2006.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; metabolite ms; TrACReview},
    opteditor = {Philipp B\"ucher and Bernard M. E. Moret},
    owner = {Sebastian},
    timestamp = {2006.10.17},
    }
  • Böcker, S. & Mäkinen, V. (2006) Combinatorial approaches for mass spectra recalibration IN Huber, C., Kohlbacher, O. & Reinert, K. (Eds.), .Internationales begegnungs- und forschungszentrum (ibfi), schloss dagstuhl, germany.
    [Bibtex]
    @InProceedings{boecker06combinatorial,
    author = {Sebastian B\"ocker and Veli M\"akinen},
    title = {Combinatorial Approaches for Mass Spectra Recalibration},
    booktitle = {Computational Proteomics},
    year = {2006},
    editor = {Christian Huber and Oliver Kohlbacher and Knut Reinert},
    number = {05471},
    series = {Dagstuhl Seminar Proceedings},
    publisher = {Internationales Begegnungs- und Forschungszentrum (IBFI), Schloss Dagstuhl, Germany},
    issn = {1862-4405},
    keywords = {jena; ms, recalibration, computational geometry},
    optaddress = {Dagstuhl, Germany},
    }
  • Dom, M., Guo, J., Hüffner, F., Niedermeier, R. & Truss, A. (2006) Fixed-parameter tractability results for feedback set problems in tournaments Springer, berlin, 320–331.
    [Bibtex]
    @InProceedings{dom06fixed-parameter,
    author = {Michael Dom and Jiong Guo and Falk H\"uffner and Rolf Niedermeier and Anke Truss},
    title = {Fixed-parameter tractability results for feedback set problems in tournaments},
    booktitle = {Proc. of Conference on Algorithms and Complexity (CIAC 2006)},
    year = {2006},
    volume = {3998},
    series = lncs,
    pages = {320--331},
    publisher = Springer,
    abstract = {Complementing recent progress on classical complexity and polynomial-time approximability of feedback set problems in (bipartite) tournaments, we extend and partially improve fixed-parameter tractability results for these problems. We show that Feedback Vertex Set in tournaments is amenable to the novel iterative compression technique. Moreover, we provide data reductions and problem kernels for Feedback Vertex Set and Feedback Arc Set in tournaments, and a depth-bounded search tree for Feedback Arc Set in bipartite tournaments based on a new forbidden subgraph characterization.},
    file = {DomEtAl_Fixed-ParameterTractabilityResults_CIAC_2006.pdf:2006/DomEtAl_Fixed-ParameterTractabilityResults_CIAC_2006.pdf:PDF},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {Sebastian},
    timestamp = {2007.02.06},
    }
  • [DOI] Sammeth, M., Griebel, T., Tille, F. & Stoye, J. (2006) Panta rhei (QAlign2): an open graphical environment for sequence analysis. IN Bioinformatics, 22.889–890.
    [Bibtex]
    @Article{sammeth06pantarhei,
    author = {Michael Sammeth and Thasso Griebel and Felix Tille and Jens Stoye},
    title = {Panta rhei ({QAlign2}): An open graphical environment for sequence analysis},
    journal = {Bioinformatics},
    year = {2006},
    volume = {22},
    number = {7},
    pages = {889--890},
    abstract = {MOTIVATION: The first version of the graphical multiple sequence alignment environment QAlign was published in 2003. Heavy response from the molecular-biological user community clearly demonstrated the need for such a platform. RESULTS: Panta rhei extends QAlign by several features. Major redesigns on the user interface, for instance, allow users to flexibily compose views for multiple projects. The new sequence viewer handles datasets with arbitrarily many and arbitrarily large sequences that may still be edited by guided block moving. More distance-based algorithms are available to interactively reconstruct phylogenetic trees which can now also be zoomed and navigated graphicaly. AVAILABILITY: Executables and the JAVA source code are available under the Apache license at http://gi.cebitec.uni-bielefeld.de/qalign CONTACT: .},
    doi = {10.1093/bioinformatics/btl007},
    keywords = {phylogenetics jena; linkpdf;},
    optmonth = apr,
    owner = {thasso},
    pmid = {16418234},
    timestamp = {2008.07.15},
    }
  • Timm, W., Böcker, S., Twellmann, T. & Nattkemper, T. W. (2006) Peak intensity prediction for PMF mass spectra using support vector regression , 565–572.
    [Bibtex]
    @InProceedings{timm06peak,
    author = {Wiebke Timm and Sebastian B\"ocker and Thorsten Twellmann and Tim W. Nattkemper},
    title = {Peak Intensity Prediction for {PMF} Mass Spectra using Support Vector Regression},
    booktitle = {Proc. of International FLINS Conference on Applied Artificial Intelligence (FLINS 2006)},
    year = {2006},
    pages = {565--572},
    file = {TimmEtAl_PeakIntensityPrediction_FLINS_2006.pdf:2006/TimmEtAl_PeakIntensityPrediction_FLINS_2006.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2006.10.17},
    }
  • Wernicke, S. (2006) Combinatorial algorithms to cope with the complexity of biological networks. PhD Thesis. .
    [Bibtex]
    @PhdThesis{wernicke06combinatorial,
    author = {Sebastian Wernicke},
    title = {Combinatorial Algorithms to Cope With the Complexity of Biological Networks},
    school = {Friedrich-Schiller-Universit\"at Jena},
    year = {2006},
    keywords = {fpt},
    owner = {Sebastian},
    timestamp = {2006.12.18},
    }
  • Wernicke, S. & Rasche, F. (2006) FANMOD: a tool for fast network motif detection. IN Bioinformatics, 22.1152–1153.
    [Bibtex]
    @Article{wernicke06fanmod,
    author = {Sebastian Wernicke and Florian Rasche},
    title = {{FANMOD:} a tool for fast network motif detection},
    journal = {Bioinformatics},
    year = {2006},
    volume = {22},
    number = {9},
    pages = {1152--1153},
    keywords = {jena; PABI; linkpdf; fpt},
    owner = {Sebastian},
    pmid = {16455747},
    timestamp = {2007.02.06},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btl038?ijkey=EGtmBvBvjGHy7Fi&keytype=ref},
    }

2005

  • Böcker, S. & Kaltenbach, H. (2005) Mass spectra alignments and their significance Springer, berlin, 429–441.
    [Bibtex]
    @InProceedings{boecker05mass,
    author = {Sebastian B\"ocker and Hans-Michael Kaltenbach},
    title = {Mass Spectra Alignments and Their Significance},
    booktitle = {Proc. of Combinatorial Pattern Matching Symposium (CPM 2005)},
    year = {2005},
    volume = {3537},
    series = lncs,
    pages = {429--441},
    publisher = Springer,
    comment = {Definition of Mass Spectra Alignments, Combinatorial derivation of fragment, mass-occurrence probabilities, estimation of moments of score distribution},
    file = {BoeckerKaltenbach_MSAlignmentsSignificance_CPM_2005.pdf:2005/BoeckerKaltenbach_MassSpectraAlignmentsSignificance_CPM_2005.pdf:PDF},
    journalref = {boecker07mass},
    keywords = {jena; linkpdf;},
    }
  • Böcker, S. & relax Zs (2005) Efficient mass decomposition Acm press, new york, 151–157.
    [Bibtex]
    @InProceedings{boecker05efficient,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k},
    title = {Efficient Mass Decomposition},
    booktitle = {Proc. of ACM Symposium on Applied Computing (ACM SAC 2005)},
    year = {2005},
    pages = {151--157},
    publisher = {ACM press, New York},
    file = {BoeckerLiptak_EfficientMassDecomposition_ACMSAC_2005.pdf:2005/BoeckerLiptak_EfficientMassDecomposition_ACMSAC_2005.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; frobenius},
    }
  • Böcker, S. & relax Zs (2005) The Money Changing Problem revisited: computing the Frobenius number in time $O(k \, a_1)$ Springer, berlin, 965–974.
    [Bibtex]
    @InProceedings{boecker05money,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k},
    title = {The {Money Changing Problem} revisited: Computing the {Frobenius} number in time {$O(k \, a_1)$}},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2005)},
    year = {2005},
    volume = {3595},
    series = lncs,
    pages = {965--974},
    publisher = Springer,
    alteditor = {Lusheng Wang},
    file = {BoeckerLiptak_MoneyChangingProblem_COCOON_2005.pdf:2005/BoeckerLiptak_MoneyChangingProblem_COCOON_2005.pdf:PDF},
    journalref = {boecker07fast},
    keywords = {jena; IDUN; linkpdf; frobenius},
    }
  • Böcker, S. & Mäkinen, V. (2005) Maximum line-pair stabbing problem and its variations Eindhoven, Netherlands, , 183–186.
    [Bibtex]
    @InProceedings{boecker05maximum,
    author = {Sebastian B\"ocker and Veli M\"akinen},
    title = {Maximum Line-Pair Stabbing Problem and its Variations},
    booktitle = {Proc. of European Workshop on Computational Geometry (EWCG 2005)},
    year = {2005},
    pages = {183--186},
    address = {Eindhoven, Netherlands},
    keywords = {jena; linkpdf;},
    url = {http://www.win.tue.nl/EWCG2005/Proceedings/47.pdf},
    }
  • Böcker, S. & Stoye, J. (2005) Informatische Methoden zur Protein- und Metaboliten-Identifikation mittels Massenspektrometrie. IN Laborpraxis, 10.24–26.
    [Bibtex]
    @Article{boecker05informatische,
    author = {Sebastian B\"ocker and Jens Stoye},
    title = {Informatische {Methoden} zur {Protein}- und {Metaboliten-Identifikation} mittels {Massenspektrometrie}},
    journal = {LaborPraxis},
    year = {2005},
    volume = {10},
    pages = {24--26},
    note = {German},
    keywords = {jena; IDUN; metabolite ms},
    }
  • [DOI] Ehrich, M., Böcker, S. & van den Boom, D. (2005) Multiplexed discovery of sequence polymorphisms using base-specific cleavage and MALDI-TOF MS. IN Nucleic acids res, 33.e38.
    [Bibtex]
    @Article{ehrich05multiplexed,
    author = {Ehrich, Mathias and B\"ocker, Sebastian and van den Boom, Dirk},
    title = {Multiplexed discovery of sequence polymorphisms using base-specific cleavage and {MALDI-TOF MS}},
    journal = {Nucleic Acids Res},
    year = {2005},
    volume = {33},
    number = {4},
    pages = {e38},
    doi = {10.1093/nar/gni038},
    file = {EhrichEtAl_MultiplexedDiscoverySequence_NucleicAcidsRes_2005.pdf:2005/EhrichEtAl_MultiplexedDiscoverySequence_NucleicAcidsRes_2005.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    pmid = {15731331},
    url = {http://nar.oxfordjournals.org/cgi/content/abstract/33/4/e38},
    }
  • Kaltenbach, H., Sudek, H., Böcker, S. & Rahmann, S. (2005) Statistics of cleavage fragments in random weighted strings .
    [Bibtex]
    @TechReport{kaltenbach05statistics,
    author = {Hans-Michael Kaltenbach and Henner Sudek and Sebastian B\"ocker and Sven Rahmann},
    title = {Statistics of cleavage fragments in random weighted strings},
    year = {2005},
    number = {TR-2005-06},
    file = {KaltenbachEtAl_StatisticsCleavageFragments_TechReport_2005.pdf:2005/KaltenbachEtAl_StatisticsCleavageFragments_TechReport_2005.pdf:PDF},
    keywords = {jena; linkpdf; whmm, statistics, combinatoris, implementation, swissprot, ms},
    owner = {michael},
    timestamp = {2005.11.11},
    }
  • Truss, A. (2005) Parametrisierte Algorithmen für Feedback-Set-Probleme auf Turniergraphen. Diplomarbeit Master Thesis. .
    [Bibtex]
    @MastersThesis{truss05parametrisierte,
    author = {A. Truss},
    title = {Parametrisierte {Algorithmen} f\"ur {Feedback-Set-Probleme} auf {Turniergraphen}},
    school = {Friedrich-Schiller-Universit\"at Jena; Institut f\"ur Informatik},
    year = {2005},
    type = {Diplomarbeit},
    abstract = {Wir besch\"aftigen uns in dieser Arbeit mit Feedback Vertex Set und Feedback Arc Set auf gerichteten Graphen im Kontext parametrisierter Algorithmen. Das Feedback-Vertex-Set-Problem besteht darin, in einem gegebenen Graphen eine Knotenmenge mit h\"ochstens k Knoten zu finden, deren L\"oschen alle Kreise des Graphen zerst\"ort. Feedback Arc Set ist das analoge Problem f\"ur Kantenl\"oschungen. Beide Probleme sind auf allgemeinen gerichteten Graphen NP-vollst\"andig. Wir geben einen \"Uberblick \"uber Komplexit\"ats- und Approximationsergebnisse f\"ur Feedback Vertex Set und Feedback Arc Set auf verschiedenen Klassen gerichteter Graphen. Zudem pr\"asentieren wir einen Polynomialzeitalgorithmus zur Berechnung eines Problemkerns der Gr\"o\"se k^2 + 2k f\"ur Feedback Arc Set auf Turniergraphen und stellen zwei neue parametrisierte Algorithmen vor: - Eine Polynomialzeitalgorithmus zur Berechnung eines Problemkerns der Gr\"o\"se k^2 + 2k f\"ur Feedback Arc Set auf Turniergraphen. - Einen O(2^k n^2(log n + k))-Algorithmus f\"ur Feedback Vertex Set auf Turniergraphen. - Einen O(3,38^k n^6)-Algorithmus f\"ur Feedback Arc Set auf bipartiten Turniergraphen.},
    keywords = {fpt},
    owner = {anke},
    timestamp = {2009.10.01},
    }

2004

  • Böcker, S. (2004) Unrooted supertrees: limitations, traps, and phylogenetic patchworks. IN Bininda-Emonds, O. R. P. (Ed.), Phylogenetic supertrees: combining information to reveal the tree of life..
    [Bibtex]
    @InCollection{boecker04unrooted,
    author = {Sebastian B\"ocker},
    title = {Unrooted Supertrees: Limitations, Traps, and Phylogenetic Patchworks},
    booktitle = {Phylogenetic supertrees: Combining information to reveal the Tree of Life},
    publisher = {Kluwer Academic},
    year = {2004},
    editor = {Olaf R.P. Bininda-Emonds},
    volume = {4},
    series = {Computational Biology Book Series},
    chapter = {15},
    pages = {331--351},
    file = {Boecker_UnrootedSupertrees_CompBiolBookSeries_2004.pdf:2004/Boecker_UnrootedSupertrees_CompBiolBookSeries_2004.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics; supertrees;},
    }
  • [DOI] Böcker, S. (2004) Sequencing from compomers: using mass spectrometry for DNA de-novo sequencing of 200+~nt. IN J comput biol, 11.1110–1134.
    [Bibtex]
    @Article{boecker04sequencing,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers: Using mass spectrometry for {DNA} de-novo sequencing of 200+~nt},
    journal = {J Comput Biol},
    year = {2004},
    volume = {11},
    number = {6},
    pages = {1110--1134},
    doi = {10.1089/cmb.2004.11.1110},
    file = {Boecker_SequencingFromCompomers_JCB_2004.pdf:2004/Boecker_SequencingFromCompomers_JCB_2004.pdf:PDF},
    keywords = {jena; linkpdf; dna ms, dna sequencing},
    pmid = {15662201},
    }
  • Böcker, S. (2004) Sequencing from compomers: the puzzle Tuscany, Italy, , 132–146.
    [Bibtex]
    @InProceedings{boecker04sequencing-fun,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers: The puzzle},
    booktitle = {Proc. of {FUN} with Algorithms 2004},
    year = {2004},
    pages = {132--146},
    address = {Tuscany, Italy},
    file = {Boecker_SequencingFromCompomers_FUN_2004.pdf:2004/Boecker_SequencingFromCompomers_FUN_2004.pdf:PDF},
    journalref = {boecker06sequencing},
    keywords = {jena; linkpdf; dna sequencing, dna ms},
    }
  • Böcker, S. (2004) Sequencing from compomers is NP-hard .
    [Bibtex]
    @TechReport{boecker04sequencing-hard-techrep,
    author = {Sebastian B\"ocker},
    title = {Sequencing From Compomers is {NP}-hard},
    year = {2004},
    number = {TR-2004-01},
    keywords = {jena; dna sequencing, dna ms},
    }
  • Böcker, S. (2004) Weighted sequencing from compomers: DNA de-novo sequencing from mass spectrometry data in the presence of false negative peaks , 13–23.
    [Bibtex]
    @InProceedings{boecker04weighted,
    author = {Sebastian B\"ocker},
    title = {Weighted Sequencing from Compomers: {DNA} de-novo sequencing from mass spectrometry data in the presence of false negative peaks},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2004)},
    year = {2004},
    volume = {P-53},
    series = {Lecture Notes in Informatics},
    pages = {13--23},
    file = {Boecker_WeightedSequencingFromCompomers_GCB_2004.pdf:2004/Boecker_WeightedSequencingFromCompomers_GCB_2004.pdf:PDF},
    keywords = {jena; linkpdf; dna ms, dna sequencing},
    url = {http://subs.emis.de/LNI/Proceedings/Proceedings53/article2724.html},
    }
  • Böcker, S. & relax Zs (2004) The money changing problem revisited: computing the Frobenius number in time $O(ka_1)$ .
    [Bibtex]
    @TechReport{boecker04money,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k},
    title = {The Money Changing Problem Revisited: Computing the {Frobenius} Number in Time {$O(ka_1)$}},
    year = {2004},
    type = {Technical Report},
    number = {TR-2004-02},
    keywords = {jena; frobenius},
    optmonth = jun,
    }
  • [DOI] Lefmann, M., Honisch, C., Böcker, S., Storm, N., von Wintzingerode, F., Schlötelburg, C., Moter, A., van den Boom, D. & Göbel, U. B. (2004) A novel mass spectrometry based tool for genotypic identification of mycobacteria. IN J clin microbiol, 42.339–346.
    [Bibtex]
    @Article{lefmann04novel,
    author = {Michael Lefmann and Christiane Honisch and Sebastian B\"ocker and Niels Storm and Friedrich {von Wintzingerode} and Cord Schl\"otelburg and Annette Moter and Dirk {van den Boom} and Ulf B. G\"obel},
    title = {A novel mass spectrometry based tool for genotypic identification of mycobacteria},
    journal = {J Clin Microbiol},
    year = {2004},
    volume = {42},
    number = {1},
    pages = {339--346},
    doi = {http://jcm.asm.org/cgi/reprint/42/1/339},
    keywords = {jena; linkpdf; dna ms},
    pmid = {14715774},
    }
  • Stanssens, P., Zabeau, M., Meersseman, G., Remes, G., Gansemans, Y., Storm, N., Hartmer, R., Honisch, C., Rodi, C. P., Böcker, S. & van den Boom, D. (2004) High-throughput MALDI-TOF discovery of genomic sequence polymorphisms. IN Genome res, 14.126–133.
    [Bibtex]
    @Article{stanssens04high-throughput,
    author = {Patrick Stanssens and Marc Zabeau and Geert Meersseman and Gwen Remes and Yannick Gansemans and Niels Storm and Ralf Hartmer and Christiane Honisch and Charles P. Rodi and Sebastian B\"ocker and Dirk {van den Boom}},
    title = {High-throughput {MALDI-TOF} Discovery of Genomic Sequence Polymorphisms},
    journal = {Genome Res},
    year = {2004},
    volume = {14},
    pages = {126--133},
    keywords = {jena; linkpdf; dna ms},
    url = {http://www.genome.org/cgi/content/full/14/1/126},
    }

2003

  • [DOI] Böcker, S. (2003) Sequencing from compomers: using mass spectrometry for DNA de-novo sequencing of 200+~nt (extended abstract) Springer, berlin, 476–497.
    [Bibtex]
    @InProceedings{boecker03sequencing,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers: Using mass spectrometry for {DNA} de-novo sequencing of 200+~nt (extended abstract)},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2003)},
    year = {2003},
    volume = {2812},
    series = lncs,
    pages = {476--497},
    publisher = Springer,
    doi = {10.1007/b13243},
    file = {:2003/Boecker_SequencingFromCompomers_WABI_2003.pdf:PDF},
    journalref = {boecker04sequencing},
    keywords = {jena; linkpdf; dna ms, dna sequencing},
    }
  • Böcker, S. (2003) Sequencing from compomers in the presence of false negative peaks .
    [Bibtex]
    @TechReport{boecker03sequencing-fn-techrep,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers in the presence of false negative peaks},
    year = {2003},
    number = {TR-2003-07},
    file = {Boecker_SequencingFromCompomersWeighted_techrep_2003.pdf:2003/Boecker_SequencingFromCompomersWeighted_techrep_2003.pdf:PDF},
    keywords = {jena; linkpdf; dna sequencing, dna ms},
    }
  • Böcker, S. (2003) Sequencing from compomers: using mass spectrometry for DNA de-novo sequencing of 200+~nt .
    [Bibtex]
    @TechReport{boecker03sequencing-techrep,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers: Using mass spectrometry for {DNA} de-novo sequencing of 200+~nt},
    year = {2003},
    number = {TR-2003-04},
    file = {Boecker_SequencingFromCompomers_techrep_2003.pdf:2003/Boecker_SequencingFromCompomers_techrep_2003.pdf:PDF},
    keywords = {jena; linkpdf; dna ms, dna sequencing},
    }
  • [DOI] Böcker, S. (2003) SNP and mutation discovery using base-specific cleavage and MALDI-TOF mass spectrometry. IN Bioinformatics, 19.i44–i53.
    [Bibtex]
    @Article{boecker03snp,
    author = {Sebastian B\"ocker},
    title = {{SNP} and mutation discovery using base-specific cleavage and {MALDI-TOF} mass spectrometry},
    journal = {Bioinformatics},
    year = {2003},
    volume = {19},
    pages = {i44--i53},
    note = {Proc. of \emph{Intelligent Systems for Molecular Biology} (ISMB 2003)},
    doi = {10.1093/bioinformatics/btg1004},
    keywords = {jena; linkpdf; dna ms},
    pmid = {12855436},
    url = {http://www.iscb.org/ismb2003/paperAbstracts/btg1004.pdf},
    }
  • Böcker, S. (2003) SNP and mutation discovery using base-specific cleavage and MALDI-TOF mass spectrometry .
    [Bibtex]
    @TechReport{boecker03snp-techrep,
    author = {Sebastian B\"ocker},
    title = {{SNP} and mutation discovery using base-specific cleavage and {MALDI-TOF} mass spectrometry},
    year = {2003},
    number = {TR-2003-02},
    file = {Boecker_SNPMutationDiscovery_techrep_2003.pdf:2003/Boecker_SNPMutationDiscovery_techrep_2003.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    }
  • [DOI] Hartmer, R., Storm, N., Boecker, S., Rodi, C. P., Hillenkamp, F., Jurinke, C. & van den Boom, D. (2003) RNAse T1 mediated base-specific cleavage and MALDI-TOF MS for high-throughput comparative sequence analysis. IN Nucleic acids res, 31.e47.
    [Bibtex]
    @Article{hartmer03rnase,
    author = {Ralf Hartmer and Niels Storm and Sebastian Boecker and Charles P. Rodi and Franz Hillenkamp and Christian Jurinke and Dirk {van den Boom}},
    title = {{RNAse T1} mediated base-specific cleavage and {MALDI-TOF MS} for high-throughput comparative sequence analysis},
    journal = {Nucleic Acids Res},
    year = {2003},
    volume = {31},
    number = {9},
    pages = {e47},
    doi = {10.1093/nar/gng047},
    file = {HartmerEtAl_RNaseT1Mediated_NucleicAcidsRes_2003.pdf:2003/HartmerEtAl_RNaseT1Mediated_NucleicAcidsRes_2003.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    pmid = {12711692},
    }

2002

  • [DOI] Böcker, S. (2002) Exponentially many supertrees. IN Appl math let, 15.861–865.
    [Bibtex]
    @Article{boecker02exponentially,
    author = {Sebastian B\"ocker},
    title = {Exponentially many supertrees},
    journal = {Appl Math Let},
    year = {2002},
    volume = {15},
    number = {7},
    pages = {861--865},
    doi = {10.1016/S0893-9659(02)00054-X},
    file = {Boecker_ExponentiallyManySupertrees_ApplMathLetters_2002.pdf:2002/Boecker_ExponentiallyManySupertrees_ApplMathLetters_2002.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }
  • [DOI] von Wintzingerode, F., Böcker, S., Schlötelburg, C., Chiu, N. H. L., Storm, N., Jurinke, C., Cantor, C. R., Göbel, U. B. & van den Boom, D. (2002) Base-specific fragmentation of amplified 16S rRNA genes and mass spectrometry analysis: a novel tool for rapid bacterial identification. IN Proc natl acad sci u s a, 99.7039–7044.
    [Bibtex]
    @Article{von-wintzingerode02base-specific,
    author = {Friedrich {von Wintzingerode} and Sebastian B\"ocker and Cord Schl\"otelburg and Norman H.L. Chiu and Niels Storm and Christian Jurinke and Charles R. Cantor and Ulf B. G\"obel and Dirk {van den Boom}},
    title = {Base-specific fragmentation of amplified {16{S} rRNA} genes and mass spectrometry analysis: A novel tool for rapid bacterial identification},
    journal = {Proc Natl Acad Sci U S A},
    year = {2002},
    volume = {99},
    number = {10},
    pages = {7039--7044},
    doi = {10.1073/pnas.102165899},
    file = {vonWintzingerodeEtAl_BaseSpecificFragmentation16SrRNA_PNAS_2002.pdf:2002/vonWintzingerodeEtAl_BaseSpecificFragmentation16SrRNA_PNAS_2002.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    }

2001

  • [DOI] Böcker, S. & Dress, A. W. M. (2001) Patchworks. IN Adv math, 157.1–21.
    [Bibtex]
    @Article{boecker01patchworks,
    author = {Sebastian B\"ocker and Andreas W.M. Dress},
    title = {Patchworks},
    journal = {Adv Math},
    year = {2001},
    volume = {157},
    number = {1},
    pages = {1--21},
    doi = {doi:10.1006/aima.1999.1912},
    file = {BoeckerDress_Patchworks_AdvMath_2001.pdf:2001/BoeckerDress_Patchworks_AdvMath_2001.pdf:PDF},
    keywords = {jena; linkpdf;},
    }

2000

  • [DOI] Böcker, S., Bryant, D., Dress, A. W. M. & Steel, M. A. (2000) Algorithmic aspects of tree amalgamation. IN J algorithms, 37.522–537.
    [Bibtex]
    @Article{boecker00algorithmic,
    author = {Sebastian B\"ocker and David Bryant and Andreas W.M. Dress and Michael A. Steel},
    title = {Algorithmic aspects of tree amalgamation},
    journal = {J Algorithms},
    year = {2000},
    volume = {37},
    pages = {522--537},
    doi = {10.1006/jagm.2000.1116},
    file = {BoeckerEtAl_AlgorithmicAspectsTreeAmalgamation_JAlgorithms_2000.pdf:2000/BoeckerEtAl_AlgorithmicAspectsTreeAmalgamation_JAlgorithms_2000.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }
  • [DOI] Böcker, S. & Dress, A. W. M. (2000) A note on maximal hierarchies. IN Adv math, 151.270–282.
    [Bibtex]
    @Article{boecker00note,
    author = {Sebastian B\"ocker and Andreas W.M. Dress},
    title = {A Note on Maximal Hierarchies},
    journal = {Adv Math},
    year = {2000},
    volume = {151},
    number = {2},
    pages = {270--282},
    doi = {doi:10.1006/aima.1999.1891},
    file = {BoeckerDress_NoteMaximalHierarchies_AdvMath_2000.pdf:2000/BoeckerDress_NoteMaximalHierarchies_AdvMath_2000.pdf:PDF},
    keywords = {jena; linkpdf;},
    }
  • Steel, M. A., Dress, A. W. M. & Böcker, S. (2000) Simple but fundamental limitations on supertree and consensus tree methods. IN Syst biol, 49.363–368.
    [Bibtex]
    @Article{steel00simple,
    author = {Michael A. Steel and Andreas W.M. Dress and Sebastian B\"ocker},
    title = {Simple but fundamental limitations on supertree and consensus tree methods},
    journal = {Syst Biol},
    year = {2000},
    volume = {49},
    number = {2},
    pages = {363--368},
    file = {SteelEtAl_SimpleFundamentalLimitations_SystematicBiology_2000.pdf:2000/SteelEtAl_SimpleFundamentalLimitations_SystematicBiology_2000.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    pmid = {12118411},
    }

1999

  • Böcker, S., Dress, A. W. M. & Steel, M. A. (1999) Patching up $X$-trees. IN Ann comb, 3.1–12.
    [Bibtex]
    @Article{boecker99patching,
    author = {Sebastian B\"ocker and Andreas W.M. Dress and Michael A. Steel},
    title = {Patching up {$X$}-Trees},
    journal = {Ann Comb},
    year = {1999},
    volume = {3},
    pages = {1--12},
    file = {BoeckerEtAl_PatchingUpXtrees_AnnalComb_1999.pdf:1999/BoeckerEtAl_PatchingUpXtrees_AnnalComb_1999.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }

1998

  • [DOI] Böcker, S. & Dress, A. W. M. (1998) Recovering symbolically dated, rooted trees from symbolic ultrametrics. IN Adv math, 138.105–125.
    [Bibtex]
    @Article{boecker98recovering,
    author = {Sebastian B\"ocker and Andreas W.M. Dress},
    title = {Recovering Symbolically Dated, Rooted Trees from Symbolic Ultrametrics},
    journal = {Adv Math},
    year = {1998},
    volume = {138},
    number = {1},
    pages = {105--125},
    doi = {doi:10.1006/aima.1998.1743},
    file = {BoeckerDress_RecoveringSymbolicallyDated_AdvMath_1998.pdf:1998/BoeckerDress_RecoveringSymbolicallyDated_AdvMath_1998.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }