Publications

Authors: Type:

2019

  • [DOI] K. Dührkop, M. Fleischauer, M. Ludwig, A. A. Aksenov, A. V. Melnik, M. Meusel, P. C. Dorrestein, J. Rousu, and S. Böcker, “Sirius 4: a rapid tool for turning tandem mass spectra into metabolite structure information,” Nat methods, 2019.
    [Bibtex]
    @Article{duehrkop19sirius4,
    author = {Kai D\"uhrkop and Markus Fleischauer and Marcus Ludwig and Alexander A. Aksenov and Alexey V. Melnik and Marvin Meusel and Pieter C. Dorrestein and Juho Rousu and Sebastian B\"ocker},
    title = {SIRIUS 4: a rapid tool for turning tandem mass spectra into metabolite structure information},
    journal = {Nat Methods},
    year = {2019},
    note = {Doi 10.1038/s41592-019-0344-8},
    abstract = {Mass spectrometry is a predominant experimental technique in metabolomics and related fields, but metabolite structural elucidation remains highly challenging. We report SIRIUS 4 (https://bio.informatik.uni-jena.de/sirius/), which provides a fast computational approach for molecular structure identification. SIRIUS 4 integrates CSI:FingerID for searching in molecular structure databases. Using SIRIUS 4, we achieved identification rates of more than 70% on challenging metabolomics datasets.},
    doi = {10.1038/s41592-019-0344-8},
    keywords = {jena},
    owner = {Sebastian},
    timestamp = {2019-03-18},
    }

2018

  • [DOI] P. Kulkarni, M. Dost, Ö. D. Bulut, A. Welle, S. Böcker, W. Boland, and A. Svatoš, “Secondary ion mass spectrometry imaging and multivariate data analysis reveal co-aggregation patterns of populus trichocarpa leaf surface compounds on a micrometer scale,” The plant journal, vol. 93, iss. 1, pp. 193-206, 2018.
    [Bibtex]
    @Article{kulkarni18secondary,
    author = {Kulkarni, Purva and Dost, Mina and Bulut, \"Ozg\"ul Demir and Welle, Alexander and B\"ocker, Sebastian and Boland, Wilhelm and Svato{\v{s}}, Ale{\v{s}}},
    title = {Secondary ion mass spectrometry imaging and multivariate data analysis reveal co-aggregation patterns of Populus trichocarpa leaf surface compounds on a micrometer scale},
    journal = {The Plant Journal},
    year = {2018},
    volume = {93},
    number = {1},
    pages = {193-206},
    abstract = {Spatially resolved analysis of a multitude of compound classes has become feasible with the rapid advancement in mass spectrometry imaging strategies. In this study, we present a protocol that combines high lateral resolution TOF-SIMS imaging with a multivariate data analysis (MVA) approach to probe the complex leaf surface chemistry of Populus trichocarpa. Here, epicuticular waxes (EWs) found on the adaxial leaf surface of Populus trichocarpa were blotted on silicon wafers and imaged using time-of-flight secondary ion mass spectrometry (TOF-SIMS) at 10 m and 1 m lateral resolution. Intense M+ and M- molecular ions were clearly visible, which made it possible to resolve the individual compound classes present in EWs. Series of long-chain aliphatic saturated alcohols (C21-C30), hydrocarbons (C25-C33) and wax esters (C44-C48) were clearly observed. These data correlated with the 7Li-chelation matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF MS) analysis, which yielded mostly molecular adduct ions of the analyzed compounds.
    Subsequently, MVA was used to interrogate the TOF-SIMS dataset for identifying hidden patterns on the leafs surface based on its chemical profile. After the application of principal component analysis (PCA), a small number of principal components (PCs) were found to be sufficient to explain maximum variance in the data. To further confirm the contributions from pure components, a 5-factor multivariate curve resolution (MCR) model was applied. Two distinct patterns of small islets, here termed crystals were apparent from the resulting score plots. Based on PCA and MCR results, the crystals were found to be formed by C23 or C29 alcohols. Other less obvious patterns observed in the PCs revealed that the adaxial leaf surface is coated with a relatively homogenous layer of alcohols, hydrocarbons and wax esters.
    The ultra-high resolution TOF-SIMS imaging combined with MVA approach helped to highlight the diverse patterns underlying the leafs surface. Currently, the methods available to analyze the surface chemistry of waxes in conjunction with the spatial information related to the distribution of compounds are limited. This study uses tools that may provide important biological insights into the composition of the wax layer, how this layer is repaired after mechanical damage or insect feeding, and which transport mechanisms are involved in deploying wax constituents to specific regions on the leaf surface.},
    doi = {10.1111/tpj.13763},
    keywords = {jena; SIMS imaging},
    owner = {Purva},
    pmid = {29117637},
    timestamp = {2017-10-24},
    }
  • [DOI] M. Ludwig, K. Dührkop, and S. Böcker, “Bayesian networks for mass spectrometric metabolite identification via molecular fingerprints,” Bioinformatics, vol. 34, iss. 13, p. i333-i340, 2018.
    [Bibtex]
    @Article{ludwig18bayesian,
    author = {Marcus Ludwig and Kai D\"uhrkop and Sebastian B\"ocker},
    title = {Bayesian networks for mass spectrometric metabolite identification via molecular fingerprints},
    journal = {Bioinformatics},
    year = {2018},
    volume = {34},
    number = {13},
    pages = {i333-i340},
    note = {Proc.\ of \emph{Intelligent Systems for Molecular Biology} (ISMB 2018).},
    abstract = {Metabolites, small molecules that are involved in cellular reactions, provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually rely on tandem mass spectrometry to identify the thousands of compounds in a biological sample. Recently, we presented CSI:FingerID for searching in molecular structure databases using tandem mass spectrometry data. CSI:FingerID predicts a molecular fingerprint that encodes the structure of the query compound, then uses this to search a molecular structure database such as PubChem. Scoring of the predicted query fingerprint and deterministic target fingerprints is carried out assuming independence between the molecular properties constituting the fingerprint.
    We present a scoring that takes into account dependencies between molecular properties. As before, we predict posterior probabilities of molecular properties using machine learning. Dependencies between molecular properties are modeled as a Bayesian tree network; the tree structure is estimated on the fly from the instance data. For each edge, we also estimate the expected covariance between the two random variables. For fixed marginal probabilities, we then estimate conditional probabilities using the known covariance. Now, the corrected posterior probability of each candidate can be computed, and candidates are ranked by this score. Modeling dependencies improves identification rates of CSI:FingerID by 2.85 percentage points.},
    doi = {10.1093/bioinformatics/bty245},
    keywords = {MS; jena; fingerprints; CSI;},
    owner = {Sebastian},
    pmid = {29949965},
    timestamp = {2018.03.12},
    }
  • [DOI] T. Alexandrov, S. Böcker, P. Dorrestein, and E. Schymanski, “Computational metabolomics: identification, interpretation, imaging (Dagstuhl Seminar 17491),” Dagstuhl reports, vol. 7, iss. 12, p. 1–17, 2018.
    [Bibtex]
    @Article{alexandrov18computational,
    author = {Theodore Alexandrov and Sebastian B{\"o}cker and Pieter Dorrestein and Emma Schymanski},
    title = {Computational Metabolomics: Identification, Interpretation, Imaging ({Dagstuhl} {Seminar} 17491)},
    journal = {Dagstuhl Reports},
    year = {2018},
    volume = {7},
    number = {12},
    pages = {1--17},
    issn = {2192-5283},
    abstract = {Metabolites are key players in almost all biological processes, and play various functional roles providing energy, building blocks, signaling, communication, and defense. Metabolites serve as clinical biomarkers for detecting medical conditions such as cancer; small molecule drugs account for 90% of prescribed therapeutics. Complete understanding of biological systems requires detecting and interpreting the metabolome in time and space. Following in the steps of high-throughput sequencing, mass spectrometry (MS) has become established as a key analytical technique for large-scale studies of complex metabolite mixtures. MS-based experiments generate datasets of increasing complexity and size. The Dagstuhl Seminar on Computational Metabolomics brought together leading experts from the experimental (analytical chemistry and biology) and the computational (computer science and bioinformatics) side, to foster the exchange of expertise needed to advance computational metabolomics. The focus was on a dynamic schedule with overview talks followed by break-out sessions, selected by the participants, covering the whole experimental-computational continuum in mass spectrometry. Particular focus in this seminar was given to imaging mass spectrometry techniques that integrate a spacial component into the analysis, ranging in scale from single cells to organs and organisms. },
    address = {Dagstuhl, Germany},
    doi = {10.4230/DagRep.7.12.1},
    editor = {Theodore Alexandrov and Sebastian B{\"o}cker and Pieter Dorrestein and Emma Schymanski},
    keywords = {jena; Dagstuhl;},
    owner = {Sebastian},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    timestamp = {2018.04.09},
    url = {http://drops.dagstuhl.de/opus/volltexte/2018/8674},
    urn = {urn:nbn:de:0030-drops-86740},
    }
  • [DOI] M. Fleischauer and S. Böcker, “BCD Beam Search: considering suboptimal partial solutions in Bad Clade Deletion supertrees,” Peerj, vol. 6, p. e4987, 2018.
    [Bibtex]
    @Article{fleischauer18bcd,
    author = {Markus Fleischauer and Sebastian B\"ocker},
    title = {{BCD} {Beam} {Search}: Considering suboptimal partial solutions in {Bad} {Clade} {Deletion} supertrees},
    journal = {PeerJ},
    year = {2018},
    volume = {6},
    pages = {e4987},
    issn = {2167-8359},
    abstract = {
    Supertree methods enable the reconstruction of large phylogenies. The supertree problem
    can be formalized in different ways in order to cope with contradictory information in
    the input. Some supertree methods are based on encoding the input trees in a matrix;
    other methods try to find minimum cuts in some graph. Recently, we introduced {Bad} {Clade}
    {Deletion} (BCD) supertrees which combines the graph-based computation of minimum cuts
    with optimizing a global objective function on the matrix representation of the input
    trees. The BCD supertree method has guaranteed polynomial running time and is very swift
    in practice. The quality of reconstructed supertrees was superior to matrix
    representation with parsimony (MRP) and usually on par with SuperFine for simulated
    data; but particularly for biological data, quality of BCD supertrees could not keep up
    with SuperFine supertrees. Here, we present a beam search extension for the BCD
    algorithm that keeps alive a constant number of partial solutions in each top-down
    iteration phase. The guaranteed worst-case running time of the new algorithm is still
    polynomial in the size of the input. We present an exact and a randomized subroutine to
    generate suboptimal partial solutions. Both beam search approaches consistently improve
    supertree quality on all evaluated datasets when keeping 25 suboptimal solutions alive.
    Supertree quality of the BCD Beam Search algorithm is on par with MRP and SuperFine even
    for biological data. This is the best performance of a polynomial-time supertree
    algorithm reported so far.
    },
    doi = {10.7717/peerj.4987},
    keywords = {jena; supertrees; bcd supertrees; MRC; Matrix representation with parsimony; Split fit; Phylogeny; Phylogenetics; Supermatrix; Supertree; Divide-and-conquer; Minimum cut},
    owner = {Sebastian},
    url = {https://doi.org/10.7717/peerj.4987},
    }
  • [DOI] E. Bach, S. Szedmak, C. Brouard, S. Böcker, and J. Rousu, “Liquid-chromatography retention order prediction for metabolite identification,” Bioinformatics, vol. 34, iss. 17, p. i875-i883, 2018.
    [Bibtex]
    @Article{bach18liquid-chromatography,
    author = {Eric Bach and Sandor Szedmak and C\'eline Brouard and Sebastian B\"ocker and Juho Rousu},
    title = {Liquid-Chromatography Retention Order Prediction for Metabolite Identification},
    journal = {Bioinformatics},
    year = {2018},
    volume = {34},
    number = {17},
    pages = {i875-i883},
    note = {Proc.\ of \emph{European Conference on Computational Biology} (ECCB 2018)},
    abstract = {Motivation: Liquid Chromatography (LC) followed by tandem Mass Spectrometry (MS/MS) is one of the predominant methods for metabolite identification. In recent years, machine learning has started to transform the analysis of tandem mass spectra and the identification of small molecules. In contrast, LC data is rarely used to improve metabolite identification, despite numerous published methods for retention time prediction using machine learning.
    Results: We present a machine learning method for predicting the retention order of molecules; that is, the order in which molecules elute from the LC column. Our method has important advantages over previous approaches: We show that retention order is much better conserved between instruments than retention time. To this end, our method can be trained using retention time measurements from different LC systems and configurations without tedious preprocessing, significantly increasing the amount of available training data. Our experiments demonstrate that retention order prediction is an effective way to learn retention behaviour of molecules from heterogeneous retention time data. Finally, we demonstrate how retention order prediction and MS/MS-based scores can be combined for more accurate metabolite identifications when analyzing a complete LC-MS/MS run.},
    doi = {10.1093/bioinformatics/bty590},
    keywords = {jena; retention time; RT; machine learning; ML; compound identification; mass spectrometry; MS;},
    owner = {Sebastian},
    timestamp = {2018.06.07},
    }
  • [DOI] K. Peters, A. Worrich, A. Weinhold, O. Alka, G. Balcke, C. Birkemeyer, H. Bruelheide, O. W. Calf, S. Dietz, K. Dührkop, E. Gaquerel, U. Heinig, M. Kücklich, M. Macel, C. Müller, Y. Poeschl, G. Pohnert, C. Ristok, V. M. Rodr’i{}guez, C. Ruttkies, M. Schuman, R. Schweiger, N. Shahaf, C. Steinbeck, M. Tortosa, H. Treutler, N. Ueberschaar, P. Velasco, B. M. Weiß{}, A. Widdig, S. Neumann, and N. M. {van Dam}, “Current challenges in plant eco-metabolomics.,” Int j mol sci, vol. 19, iss. 5, 2018.
    [Bibtex]
    @Article{peters18current,
    author = {Peters, Kristian and Worrich, Anja and Weinhold, Alexander and Alka, Oliver and Balcke, Gerd and Birkemeyer, Claudia and Bruelheide, Helge and Calf, Onno W. and Dietz, Sophie and D\"uhrkop, Kai and Gaquerel, Emmanuel and Heinig, Uwe and K\"ucklich, Marlen and Macel, Mirka and M\"uller, Caroline and Poeschl, Yvonne and Pohnert, Georg and Ristok, Christian and Rodr\'\i{}guez, Victor M. and Ruttkies, Christoph and Schuman, Meredith and Schweiger, Rabea and Shahaf, Nir and Steinbeck, Christoph and Tortosa, Maria and Treutler, Hendrik and Ueberschaar, Nico and Velasco, Pablo and Wei\ss{}, Brigitte M. and Widdig, Anja and Neumann, Steffen and {van Dam}, Nicole M.},
    title = {Current challenges in plant eco-metabolomics.},
    journal = {Int J Mol Sci},
    year = {2018},
    volume = {19},
    number = {5},
    abstract = {The relatively new research discipline of Eco-Metabolomics is the application of metabolomics techniques to ecology with the aim to characterise biochemical interactions of organisms across different spatial and temporal scales. Metabolomics is an untargeted biochemical approach to measure many thousands of metabolites in different species, including plants and animals. Changes in metabolite concentrations can provide mechanistic evidence for biochemical processes that are relevant at ecological scales. These include physiological, phenotypic and morphological responses of plants and communities to environmental changes and also interactions with other organisms. Traditionally, research in biochemistry and ecology comes from two different directions and is performed at distinct spatiotemporal scales. Biochemical studies most often focus on intrinsic processes in individuals at physiological and cellular scales. Generally, they take a bottom-up approach scaling up cellular processes from spatiotemporally fine to coarser scales. Ecological studies usually focus on extrinsic processes acting upon organisms at population and community scales and typically study top-down and bottom-up processes in combination. Eco-Metabolomics is a transdisciplinary research discipline that links biochemistry and ecology and connects the distinct spatiotemporal scales. In this review, we focus on approaches to study chemical and biochemical interactions of plants at various ecological levels, mainly plant-organismal interactions, and discuss related examples from other domains. We present recent developments and highlight advancements in Eco-Metabolomics over the last decade from various angles. We further address the five key challenges: (1) complex experimental designs and large variation of metabolite profiles; (2) feature extraction; (3) metabolite identification; (4) statistical analyses; and (5) bioinformatics software tools and workflows. The presented solutions to these challenges will advance connecting the distinct spatiotemporal scales and bridging biochemistry and ecology.},
    doi = {10.3390/ijms19051385},
    keywords = {jena; review;},
    pmid = {29734799},
    }
  • [DOI] K. Dührkop, M. A. Lataretu, T. W. J. White, and S. Böcker, “Heuristic algorithms for the maximum colorful subtree problem,” in Proc. of workshop on algorithms in bioinformatics (wabi 2018), Dagstuhl, Germany, 2018, p. 23:1–23:14.
    [Bibtex]
    @InProceedings{duehrkop18heuristic,
    author = {Kai D\"uhrkop and Marie A. Lataretu and W. Timothy J. White and Sebastian B\"ocker},
    title = {Heuristic algorithms for the Maximum Colorful Subtree problem},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2018)},
    year = {2018},
    volume = {113},
    series = {Leibniz International Proceedings in Informatics (LIPIcs)},
    pages = {23:1--23:14},
    address = {Dagstuhl, Germany},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    abstract = {In metabolomics, small molecules are structurally elucidated using tandem mass spectrometry (MS/MS); this computational task can be formulated as the Maximum
    Colorful Subtree problem, which is NP-hard. Unfortunately, data from a single metabolite requires us to solve hundreds or thousands of instances of this problem---and in a single Liquid Chromatography MS/MS run, hundreds or thousands of metabolites are measured.
    Here, we comprehensively evaluate the performance of several heuristic algorithms for the problem. Unfortunately, as is often the case in bioinformatics, the structure of the (chemically) true solution is not known to us; therefore we can only evaluate against the optimal solution of an instance. Evaluating the quality of a heuristic based on scores can be misleading: Even a slightly suboptimal solution can be structurally very different from the optimal solution, but it is the structure of a solution and not its score that is relevant for the downstream analysis. To this end, we propose a different evaluation setup: Given a set of candidate instances of
    which exactly one is known to be correct, the heuristic in question solves each instance to the best of its ability, producing a score for each instance, which is then used to rank the instances. We then evaluate whether the correct instance is ranked highly by the heuristic.
    We find that one particular heuristic consistently ranks the correct instance in a top position. We also find that the scores of the best heuristic solutions are very close to the optimal score; in contrast, the structure of the solutions can deviate significantly from the optimal structures. Integrating the heuristic allowed us to speed up computations in practice by a factor of 100-fold.
    },
    doi = {10.4230/LIPIcs.WABI.2018.23},
    isbn = {978-3-95977-082-8},
    issn = {1868-8969},
    keywords = {jena; Maximum Color Subtree problem; heuristics;},
    owner = {Sebastian},
    timestamp = {2018.06.10},
    url = {http://drops.dagstuhl.de/opus/volltexte/2018/9325},
    }
  • [DOI] E. W. Deutsch, Y. Perez-Riverol, R. J. Chalkley, M. Wilhelm, S. Tate, T. Sachsenberg, M. Walzer, L. Käll, B. Delanghe, S. Böcker, E. L. Schymanski, P. Wilmes, V. Dorfer, B. Kuster, P. Volders, N. Jehmlich, J. P. C. Vissers, D. W. Wolan, A. Y. Wang, L. Mendoza, J. Shofstahl, A. W. Dowsey, J. Griss, R. M. Salek, S. Neumann, P. Binz, H. Lam, J. A. Vizca{‘i}no, N. Bandeira, and H. Röst, “Expanding the use of spectral libraries in proteomics,” J proteome res, vol. 17, iss. 12, pp. 4051-4060, 2018.
    [Bibtex]
    @Article{deutsch18expanding,
    author = {Deutsch, Eric W and Perez-Riverol, Yasset and Chalkley, Robert J and Wilhelm, Mathias and Tate, Stephen and Sachsenberg, Timo and Walzer, Mathias and K\"all, Lukas and Delanghe, Bernard and B\"ocker, Sebastian and Schymanski, Emma L and Wilmes, Paul and Dorfer, Viktoria and Kuster, Bernhard and Volders, Pieter-Jan and Jehmlich, Nico and Vissers, Johannes P C and Wolan, Dennis W and Wang, Ana Y and Mendoza, Luis and Shofstahl, Jim and Dowsey, Andrew W and Griss, Johannes and Salek, Reza M and Neumann, Steffen and Binz, Pierre-Alain and Lam, Henry and Vizca{\'\i}no, Juan Antonio and Bandeira, Nuno and R\"ost, Hannes},
    title = {Expanding the use of spectral libraries in proteomics},
    journal = {J Proteome Res},
    year = {2018},
    volume = {17},
    number = {12},
    pages = {4051-4060},
    abstract = {The 2017 Dagstuhl Seminar on Computational Proteomics provided an opportunity for a broad discussion on the current state and future directions of the generation and use of peptide tandem mass spectrometry spectral libraries. Their use in proteomics is growing slowly, but there are multiple challenges in the field that must be addressed to further increase the adoption of spectral libraries and related techniques. The primary bottlenecks are the paucity of high quality and comprehensive libraries and the general difficulty of adopting spectral library searching into existing workflows. There are several existing spectral library formats, but none capture a satisfactory level of metadata; therefore a logical next improvement is to design a more advanced, Proteomics Standards Initiative-approved spectral library format that can encode all of the desired metadata. The group discussed a series of metadata requirements organized into three designations of completeness or quality, tentatively dubbed bronze, silver, and gold. The metadata can be organized at four different levels of granularity: at the collection (library) level, at the individual entry (peptide ion) level, at the peak (fragment ion) level, and at the peak annotation level. Strategies for encoding mass modifications in a consistent manner and the requirement for encoding high-quality and commonly-seen but as-yet-unidentified spectra were discussed. The group also discussed related topics, including strategies for comparing two spectra, techniques for generating representative spectra for a library, approaches for selection of optimal signature ions for targeted workflows, and issues surrounding the merging of two or more libraries into one. We present here a review of this field and the challenges that the community must address in order to accelerate the adoption of spectral libraries in routine analysis of proteomics datasets.},
    doi = {10.1021/acs.jproteome.8b00485},
    keywords = {jena; Dagstuhl;},
    pmid = {30270626},
    pubstatus = {aheadofprint},
    revised = {2018-10-01},
    timestamp = {2018.10.15},
    }

2017

  • [DOI] S. Böcker, “Searching molecular structure databases using tandem MS data: are we there yet?,” Curr opin chem biol, vol. 36, pp. 1-6, 2017.
    [Bibtex]
    @Article{boecker17searching,
    author = {Sebastian B\"ocker},
    title = {Searching molecular structure databases using tandem {MS} data: are we there yet?},
    journal = {Curr Opin Chem Biol},
    year = {2017},
    volume = {36},
    pages = {1-6},
    issn = {1367-5931},
    abstract = {Untargeted metabolomics experiments usually rely on tandem mass spectrometry (MS/MS) to identify the thousands of compounds in a complex sample. Spectral libraries used for identification are incomplete, and many metabolites remain unknown. There has been a recent development to replace spectral libraries by molecular structure databases when searching the MS/MS data of the unknown compound. Several tools have been developed for this task, including CFM-ID, MetFrag, MAGMa(+), FingerID and CSI:FingerID. These methods are already helpful for everyday metabolomics; with further advances, these methods can become indispensable tools for tomorrow's metabolomics. Here, I discuss several questions related to this task, such as: Why not wait for spectral libraries to grow sufficiently? Why evaluate methods outside their 'comfort zone'? Should we use prior information such as citation frequencies? And, ultimately: are we there yet?},
    doi = {10.1016/j.cbpa.2016.12.010},
    keywords = {jena; MS; review;},
    owner = {Sebastian},
    pmid = {28025165},
    timestamp = {2017.01.02},
    url = {https://authors.elsevier.com/a/1UF-u4sz6LvFfY},
    }
  • [DOI] B. Bartels, P. Kulkarni, N. Danz, S. Böcker, H. P. Saluz, and A. Svatoš, “Mapping metabolites from rough terrain: laser ablation electrospray ionization on non-flat samples,” Rsc adv, vol. 7, pp. 9045-9050, 2017.
    [Bibtex]
    @Article{bartels17mapping,
    author = {Benjamin Bartels and Purva Kulkarni and Norbert Danz and Sebastian B\"ocker and Hans Peter Saluz and Ale\v{s} Svato\v{s}},
    title = {Mapping metabolites from rough terrain: laser ablation electrospray ionization on non-flat samples},
    journal = {RSC Adv},
    year = {2017},
    volume = {7},
    pages = {9045-9050},
    abstract = {Established laser-based ionization experiments require the surface of a sample to be as flat as possible to guarantee optimal laser focus. A laser ablation electrospray ionization (LAESI) source was custom-built to accommodate the topography of non-flat sample surfaces. Employing a confocal distance sensor, a height profile of the surface in question was recorded prior to the actual ionization experiment. The robustness of the system was evaluated by the metabolic profiling of radish (Raphanus sativus) leaves, chosen due to their pronounced surface features and known content of specialized metabolites. After the ionization experiments, light microscopy imaging was performed to evaluate ablation crater size and position. Reproducible ablation mark diameters of 69 � 7 micrometer in average have been achieved. Mass spectrometric imaging capability has been proven on R. sativus leaf samples as well.},
    doi = {10.1039/C6RA26854D},
    keywords = {jena; MS; mass spectrometry; imaging MS;},
    owner = {Sebastian},
    timestamp = {2017.01.28},
    url = {http://pubs.rsc.org/en/content/articlehtml/2017/ra/c6ra26854d},
    }
  • C. Brouard, E. Bach, S. Böcker, and J. Rousu, “Magnitude-preserving ranking for structured outputs,” in Proc. of asian conference on machine learning, 2017, p. 407–422.
    [Bibtex]
    @InProceedings{brouard17magnitude-preserving,
    author = {C\'{e}line Brouard and Eric Bach and Sebastian B\"{o}cker and Juho Rousu},
    title = {Magnitude-Preserving Ranking for Structured Outputs},
    booktitle = {Proc. of Asian Conference on Machine Learning},
    year = {2017},
    volume = {77},
    series = {Proceedings of Machine Learning Research},
    pages = {407--422},
    publisher = {PMLR},
    abstract = {In this paper, we present a novel method for solving structured prediction problems, based on combining Input Output Kernel Regression (IOKR) with an extension of magnitude-preserving ranking to structured output spaces. In particular, we concentrate on the case where a set of candidate outputs has been given, and the associated pre-image problem calls for ranking the set of candidate outputs. Our method, called magnitude-preserving IOKR, both aims to produce a good approximation of the output feature vectors, and to preserve the magnitude differences of the output features in the candidate sets. For the case where the candidate set does not contain corresponding 'correct' inputs, we propose a method for approximating the inputs through application of IOKR in the reverse direction. We apply our method to two learning problems: cross-lingual document retrieval and metabolite identification. Experiments show that the proposed approach improves performance over IOKR, and in the latter application obtains the current state-of-the-art accuracy.},
    file = {brouard17a.pdf:http\://proceedings.mlr.press/v77/brouard17a/brouard17a.pdf:PDF},
    keywords = {jena; ML; MS; MS/MS; IOKR; CSI:FingerID;},
    url = {http://proceedings.mlr.press/v77/brouard17a.html},
    }
  • [DOI] M. S. Engler, K. Scheubert, U. S. Schubert, and S. Böcker, “Exploring the limits of the geometric copolymerization model,” Polymers, vol. 9, p. 101, 2017.
    [Bibtex]
    @Article{engler17exploring,
    author = {Engler, Martin S. and Scheubert, Kerstin and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {Exploring the Limits of the Geometric Copolymerization Model},
    journal = {Polymers},
    year = {2017},
    volume = {9},
    pages = {101},
    abstract = {The Geometric copolymerization model is a recently introduced statistical Markov chain model. Here, we investigate its practicality. First, several approaches to identify the optimal model parameters from observed copolymer fingerprints are evaluated using Monte-Carlo simulated data. Directly optimizing the parameters is robust against noise but has impractically long running times. A compromise between robustness and running time is found by exploiting the relationship between monomer concentrations calculated by ordinary differential equations and the Geometric model. Second, we investigate the applicability of the model to copolymerizations beyond living polymerization and show that the model is useful for copolymerizations involving termination and depropagation reactions.},
    comment = {Eigentlich das "fitting MS1" paper, aber sehr sehr kurz},
    doi = {10.3390/polym9030101},
    keywords = {polymer ms; jena;},
    owner = {martin},
    timestamp = {2016.04.19},
    }
  • [DOI] M. Fleischauer and S. Böcker, “Bad Clade Deletion supertrees: a fast and accurate supertree algorithm,” Mol biol evol, vol. 34, pp. 2408-2421, 2017.
    [Bibtex]
    @Article{fleischauer17bad,
    author = {Markus Fleischauer and Sebastian B\"ocker},
    title = {{B}ad {C}lade {D}eletion Supertrees: A Fast and Accurate Supertree Algorithm},
    journal = {Mol Biol Evol},
    year = {2017},
    volume = {34},
    pages = {2408-2421},
    abstract = {Supertree methods merge a set of overlapping phylogenetic trees into a supertree containing all taxa of the input trees. The challenge in supertree reconstruction is the way of dealing with conflicting information in the input trees. Many different algorithms for different objective functions have been suggested to resolve these conflicts. In particular, there exist methods based on encoding the source trees in a matrix, where the supertree is constructed applying a local search heuristic to optimize the respective objective function. We present a novel heuristic supertree algorithm called Bad Clade Deletion (BCD) supertrees. It uses minimum cuts to delete a locally minimal number of columns from such a matrix representation so that it is compatible. This is the complement problem to Matrix Representation with Compatibility (Maximum Split Fit). Our algorithm has guaranteed polynomial worst-case running time and performs swiftly in practice. Different from local search heuristics, it guarantees to return the directed perfect phylogeny for the input matrix, corresponding to the parent tree of the input trees, if one exists. Comparing supertrees to model trees for simulated data, BCD shows a better accuracy (F1-score) than the state-of-the-art algorithms SuperFine (up to 3\%) and Matrix Representation with Parsimony (up to 7\%); at the same time, BCD is up to 7 times faster than SuperFine, and up to 600 times faster than Matrix Representation with Parsimony. Finally, using the BCD supertree as a starting tree for a combined Maximum Likelihood analysis using RAxML, we reach significantly improved accuracy (1\% higher F1-score) and running time (1.7-fold speedup).},
    doi = {10.1093/molbev/msx191},
    keywords = {jena; supertrees; bcd supertrees;},
    owner = {Sebastian},
    pmid = {28873954},
    timestamp = {2017.06.25},
    }
  • [DOI] F. Hufsky and S. Böcker, “Mining molecular structure databases: identification of small molecules based on fragmentation mass spectrometry data,” Mass spectrom rev, vol. 36, iss. 5, pp. 624-633, 2017.
    [Bibtex]
    @Article{hufsky17mining,
    author = {Franziska Hufsky and Sebastian B\"ocker},
    title = {Mining molecular structure databases: Identification of small molecules based on fragmentation mass spectrometry data},
    journal = {Mass Spectrom Rev},
    year = {2017},
    volume = {36},
    number = {5},
    pages = {624-633},
    abstract = {Mass spectrometry (MS) is a key technology for the analysis of small molecules. For the identification and structural elucidation of novel molecules, new approaches beyond straightforward spectral comparison are required. In this review, we will cover computational methods that help with the identification of small molecules by analyzing fragmentation MS data. We focus on the four main approaches to mine a database of metabolite structures, that is rule-based fragmentation spectrum prediction,
    combinatorial fragmentation, competitive fragmentation modeling, and molecular fingerprint prediction.},
    doi = {10.1002/mas.21489},
    keywords = {jena},
    owner = {Sebastian},
    pmid = {26763615},
    timestamp = {2015.12.16},
    }
  • [DOI] K. Scheubert, F. Hufsky, D. Petras, M. Wang, L. Nothias, K. Dührkop, N. Bandeira, P. C. Dorrestein, and S. Böcker, “Significance estimation for large scale metabolomics annotations by spectral matching,” Nat commun, vol. 8, p. 1494, 2017.
    [Bibtex]
    @Article{scheubert17significance,
    author = {Scheubert, Kerstin and Hufsky, Franziska and Petras, Daniel and Wang, Mingxun and Nothias, Louis-F\'elix and D\"uhrkop, Kai and Bandeira, Nuno and Dorrestein, Pieter C. and B\"ocker, Sebastian},
    title = {Significance estimation for large scale metabolomics annotations by spectral matching},
    journal = {Nat Commun},
    year = {2017},
    volume = {8},
    pages = {1494},
    abstract = {The annotation of small molecules in untargeted mass spectrometry relies on the matching of fragment spectra to reference library spectra. While various spectrum-spectrum match scores exist, the field lacks statistical methods for estimating the false discovery rates (FDR) of these annotations. We present empirical Bayes and target-decoy based methods to estimate the false discovery rate (FDR) for 70 public metabolomics data sets. We show that the spectral matching settings need to be adjusted for each project. By adjusting the scoring parameters and thresholds, the number of annotations rose, on average, by +139% (ranging from -92 up to +5705%) when compared with a default parameter set available at GNPS. The FDR estimation methods presented will enable a user to assess the scoring criteria for large scale analysis of mass spectrometry based metabolomics data that has been essential in the advancement of proteomics, transcriptomics, and genomics science.},
    doi = {10.1038/s41467-017-01318-5},
    keywords = {jena; FDR;},
    owner = {Sebastian},
    pmid = {29133785},
    timestamp = {2017.04.03},
    }
  • [DOI] E. L. Schymanski, C. Ruttkies, M. Krauss, C. Brouard, T. Kind, K. Dührkop, F. R. Allen, A. Vaniya, D. Verdegem, S. Böcker, J. Rousu, H. Shen, H. Tsugawa, T. Sajed, O. Fiehn, B. Ghesquière, and S. Neumann, “Critical Assessment of Small Molecule Identification 2016: automated methods,” J cheminf, vol. 9, p. 22, 2017.
    [Bibtex]
    @Article{schymanski17critical,
    author = {Emma Louise Schymanski and Christoph Ruttkies and Martin Krauss and C\'{e}line Brouard and Tobias Kind and Kai D\"{u}hrkop and Felicity Ruth Allen and Arpana Vaniya and Dries Verdegem and Sebastian B\"{o}cker and Juho Rousu and Huibin Shen and Hiroshi Tsugawa and Tanvir Sajed and Oliver Fiehn and Bart Ghesqui\`{e}re and Steffen Neumann},
    title = {{Critical} {Assessment} of {Small} {Molecule} {Identification} 2016: Automated Methods},
    journal = {J Cheminf},
    year = {2017},
    volume = {9},
    pages = {22},
    abstract = {Background: The fourth round of the Critical Assessment of Small Molecule Identification (CASMI) Contest (www.casmi-contest.org) was held in 2016, with two new categories for automated methods. This article covers the 208 challenges in Categories 2 and 3, without and with metadata, from organization, participation, results and post-contest evaluation of CASMI 2016 through to perspectives for future contests and small molecule annotation/identification.
    Results: The Input Output Kernel Regression (CSI:IOKR) machine learning approach performed best in "Category 2: Best Automatic Structural Identification--In Silico Fragmentation Only", won by Team Brouard with 41% challenge wins. The winner of "Category 3: Best Automatic Structural Identification--Full Information" was Team Kind (MS-FINDER), with 76% challenge wins. The best methods were able to achieve over 30% Top 1 ranks in Category 2, with all methods ranking the correct candidate in the Top 10 in around 50% of challenges. This success rate rose to 70% Top 1 ranks in Category 3, with candidates in the Top 10 in over 80% of the challenges. The machine learning and chemistry-based approaches are shown to perform in complementary ways.
    Conclusions: The improvement in (semi-)automated fragmentation methods for small molecule identification has been substantial. The achieved high rates of correct candidates in the Top 1 and Top 10, despite large candidate numbers, open up great possibilities for high-throughput annotation of untargeted analysis for "known unknowns". As more high quality training data becomes available, the improvements in machine learning methods will likely continue, but the alternative approaches still provide valuable complementary information. Improved integration of experimental context will also improve identification success further for "real life" annotations. The true "unknown unknowns" remain to be evaluated in future CASMI contests.},
    doi = {10.1186/s13321-017-0207-1},
    keywords = {jena; CASMI; MS; metabolites; tandem MS; MS/MS;},
    owner = {Sebastian},
    pmid = {29086042},
    timestamp = {2016.12.15},
    }
  • [DOI] E. Barbosa, R. Röttger, A. Hauschild, S. {de Castro Soares}, S. Böcker, V. Azevedo, and J. Baumbach, “Lifestyle-specific-islands (LiSSI): integrated bioinformatics platform for genomic island analysis,” J integr bioinform, vol. 14, iss. 2, pp. 2017-10, 2017.
    [Bibtex]
    @Article{barbosa17lifestyle,
    author = {Barbosa, Eudes and R\"ottger, Richard and Hauschild, Anne-Christin and {de Castro Soares}, Siomar and B\"ocker, Sebastian and Azevedo, Vasco and Baumbach, Jan},
    title = {LifeStyle-Specific-Islands ({LiSSI}): Integrated Bioinformatics Platform for Genomic Island Analysis},
    journal = {J Integr Bioinform},
    year = {2017},
    volume = {14},
    number = {2},
    pages = {2017-0010},
    abstract = {Distinct bacteria are able to cope with highly diverse lifestyles; for instance, they can be free living or host-associated. Thus, these organisms must possess a large and varied genomic arsenal to withstand different environmental conditions. To facilitate the identification of genomic features that might influence bacterial adaptation to a specific niche, we introduce LifeStyle-Specific-Islands (LiSSI). LiSSI combines evolutionary sequence analysis with statistical learning (Random Forest with feature selection, model tuning and robustness analysis). In summary, our strategy aims to identify conserved consecutive homology sequences (islands) in genomes and to identify the most discriminant islands for each lifestyle.},
    created = {2017-07-05},
    doi = {10.1515/jib-2017-0010},
    keywords = {jena; Gene Clusters; Bacteria; Homologous genes; Island; Lifestyle; Machine Learning},
    owner = {NLM},
    pmid = {28678736},
    revised = {2017-07-28},
    timestamp = {2017.11.15},
    }

2016

  • [DOI] S. Böcker, J. Rousu, and E. Schymanski, “Computational Metabolomics (Dagstuhl Seminar 15492),” Dagstuhl reports, vol. 5, iss. 11, p. 180–192, 2016.
    [Bibtex]
    @Article{boecker16computational,
    author = {Sebastian B{\"o}cker and Juho Rousu and Emma Schymanski},
    title = {{Computational Metabolomics} ({Dagstuhl Seminar} 15492)},
    journal = {Dagstuhl Reports},
    year = {2016},
    volume = {5},
    number = {11},
    pages = {180--192},
    issn = {2192-5283},
    address = {Dagstuhl, Germany},
    doi = {10.4230/DagRep.5.11.180},
    editor = {Sebastian B{\"o}cker and Juho Rousu and Emma Schymanski},
    keywords = {jena; algorithms, bioinformatics, cheminformatics, computational mass spectrometry, computational metabolomics, databases},
    owner = {Sebastian},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    timestamp = {2016.04.07},
    url = {http://drops.dagstuhl.de/opus/volltexte/2016/5801},
    }
  • [DOI] S. Böcker and K. Dührkop, “Fragmentation trees reloaded,” J cheminform, vol. 8, p. 5, 2016.
    [Bibtex]
    @Article{boecker16fragmentation,
    author = {Sebastian B\"ocker and Kai D\"uhrkop},
    title = {Fragmentation trees reloaded},
    journal = {J Cheminform},
    year = {2016},
    volume = {8},
    pages = {5},
    abstract = {Untargeted metabolomics commonly uses liquid chromatography mass spectrometry to measure abundances of metabolites; subsequent tandem mass spectrometry is used to derive information about individual compounds. One of the bottlenecks in this experimental setup is the interpretation of fragmentation spectra to accurately and efficiently identify compounds. Fragmentation trees have become a powerful tool for the interpretation of tandem mass spectrometry data of small molecules. These trees are determined from the data using combinatorial optimization, and aim at explaining the experimental data via fragmentation cascades. Fragmentation tree computation does not require spectral or structural databases. To obtain biochemically meaningful trees, one needs an elaborate optimization function (scoring).
    We present a new scoring for computing fragmentation trees, transforming the combinatorial optimization into a Maximum A Posteriori estimator. We demonstrate the superiority of the new scoring for two tasks: Both for the de novo identification of molecular formulas of unknown compounds, and for searching a database for structurally similar compounds, our method SIRIUS 3, performs significantly better than the previous version of our method, as well as other methods for this task.
    SIRIUS 3 can be a part of an untargeted metabolomics workflow, allowing researchers to investigate unknowns using automated computational methods.},
    doi = {10.1186/s13321-016-0116-8},
    keywords = {jena; FT; fragmentation trees;},
    owner = {Sebastian},
    pmid = {26839597},
    timestamp = {2013.06.10},
    url = {http://www.jcheminf.com/content/8/1/5},
    }
  • [DOI] C. Brouard, H. Shen, K. Dührkop, F. d’Alché-Buc, S. Böcker, and J. Rousu, “Fast metabolite identification with input output kernel regression,” Bioinformatics, vol. 32, iss. 12, p. i28-i36, 2016.
    [Bibtex]
    @Article{brouard16fast,
    author = {C{\'{e}}line Brouard and Huibin Shen and Kai D{\"{u}}hrkop and Florence {d'Alch{\'{e}}-Buc} and Sebastian B{\"{o}}cker and Juho Rousu},
    title = {Fast metabolite identification with Input Output Kernel Regression},
    journal = {Bioinformatics},
    year = {2016},
    volume = {32},
    number = {12},
    pages = {i28-i36},
    note = {Proc.\ of \emph{Intelligent Systems for Molecular Biology} (ISMB 2016)},
    abstract = {MOTIVATION: An important problematic of metabolomics is to identify metabolites using tandem mass spectrometry data. Machine learning methods have been proposed recently to solve this problem by predicting molecular fingerprint vectors and matching these fingerprints against existing molecular structure databases. In this work we propose to address the metabolite identification problem using a structured output prediction approach. This type of approach is not limited to vector output space and can handle structured output space such as the molecule space.
    RESULTS: We use the Input Output Kernel Regression method to learn the mapping between tandem mass spectra and molecular structures. The principle of this method is to encode the similarities in the input (spectra) space and the similarities in the output (molecule) space using two kernel functions. This method approximates the spectra-molecule mapping in two phases. The first phase corresponds to a regression problem from the input space to the feature space associated to the output kernel. The second phase is a preimage problem, consisting in mapping back the predicted output feature vectors to the molecule space. We show that our approach achieves state-of-the-art accuracy in metabolite identification. Moreover, our method has the advantage of decreasing the running times for the training step and the test step by several orders of magnitude over the preceding methods.},
    doi = {10.1093/bioinformatics/btw246},
    file = {pdf:2016/BrouardEtAl_FastMetaboliteIdentification_Bioinformatics_2016:PDF},
    keywords = {jena; MS; tandem MS; CSI:FingerID; kernel methods; IOKR;},
    owner = {Sebastian},
    pmid = {27307628},
    timestamp = {2016.02.20},
    }
  • [DOI] M. S. Engler, S. Crotty, M. J. Barthel, C. Pietsch, U. S. Schubert, and S. Böcker, “Abundance correction for mass discrimination effects in polymer mass spectra,” Rapid commun mass spectrom, vol. 30, p. 1233–1241, 2016.
    [Bibtex]
    @Article{engler16abundance,
    author = {Engler, Martin S. and Crotty, Sarah and Barthel, Markus J and Pietsch, Christian and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {Abundance correction for mass discrimination effects in polymer mass spectra},
    journal = {Rapid Commun Mass Spectrom},
    year = {2016},
    volume = {30},
    pages = {1233--1241},
    abstract = {RATIONALE: Matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOFMS) is frequently used to analyze homo- and copolymers, i.e. for computing copolymer fingerprints. However, the oligomer abundances are influenced by mass discrimination, i.e. mass- and composition-dependent ionization. We have developed a computational method to correct the abundance bias caused by the mass discrimination.
    METHODS: MALDI-TOFMS in combination with computational methods was used to investigate three random copolymers with different ratios of styrene and isoprene. Furthermore, equimolar high- and low-mass styrene and isoprene homopolymers (2500 and 4200 Da) were mixed and also analyzed by MALDI-TOFMS. The abundances of both copolymers and homopolymers were corrected for mass discrimination effects with our new method.
    RESULTS: The novel computational method was integrated into the existing COCONUT software. The method was demonstrated using the measured styrene and isoprene co- and homopolymers. First, the method was applied to homopolymer spectra. Subsequently, the copolymer fingerprint was computed from the copolymer MALDI mass spectra and the correcting function applied. The changes in the composition are plausible, indicating that correction of copolymer abundances was reasonable.
    CONCLUSIONS: Our computational method may help to avoid erroneous conclusions when analyzing copolymer MS spectra. The software is freely available and represents a step towards comprehensive computational support in polymer science.},
    doi = {10.1002/rcm.7553},
    file = {EnglerEtAl_AbundanceCorrectionMass_RCMS_2016.pdf:2016/EnglerEtAl_AbundanceCorrectionMass_RCMS_2016.pdf:PDF},
    keywords = {polymer ms; jena;},
    owner = {martin},
    timestamp = {2016.03.08},
    }
  • [DOI] M. S. Engler, K. Scheubert, U. S. Schubert, and S. Böcker, “New statistical models for copolymerization,” Polymers, vol. 8, iss. 6, p. 240, 2016.
    [Bibtex]
    @Article{engler16statistical,
    author = {Engler, Martin S. and Scheubert, Kerstin and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {New Statistical Models for Copolymerization},
    journal = {Polymers},
    year = {2016},
    volume = {8},
    number = {6},
    pages = {240},
    abstract = {For many years, copolymerization has been studied using mathematical and statistical models. Here, we present new Markov chain models for copolymerization kinetics: the Bernoulli and Geometric models. They model copolymer synthesis as a random process and are based on a basic reaction scheme. In contrast to previous Markov chain approaches to copolymerization, both models take variable chain lengths and time-dependent monomer probabilities into account and allow for computing sequence likelihoods and copolymer fingerprints. Fingerprints can be computed from copolymer mass spectra, potentially allowing us to estimate the model parameters from measured fingerprints. We compare both models against Monte Carlo simulations. We find that computing the models is fast and memory efficient.},
    doi = {10.3390/polym8060240},
    file = {EnglerEtAl_NewStatisticalModels_Polymers_2016.pdf:2016/EnglerEtAl_NewStatisticalModels_Polymers_2016.pdf:PDF},
    keywords = {polymer ms; jena; linkPDF},
    owner = {martin},
    timestamp = {2016.04.19},
    url = {http://www.mdpi.com/2073-4360/8/6/240},
    }
  • [DOI] M. Fleischauer and S. Böcker, “Collecting reliable clades using the greedy strict consensus merger,” Peerj, vol. 4, p. e2172, 2016.
    [Bibtex]
    @Article{fleischauer16collecting,
    author = {Fleischauer, Markus and B{\"o}cker, Sebastian},
    title = {Collecting reliable clades using the Greedy Strict Consensus Merger},
    journal = {PeerJ},
    year = {2016},
    volume = {4},
    pages = {e2172},
    issn = {2167-8359},
    abstract = {Supertree methods combine a set of phylogenetic trees into a single supertree. Similar to supermatrix methods, these methods provide a way to reconstruct larger parts of the Tree of Life, potentially evading the computational complexity of phylogenetic inference methods such as maximum likelihood. The supertree problem can be formalized in different ways, to cope with contradictory information in the input. Many supertree methods have been developed. Some of them solve NP-hard optimization problems like the well-known Matrix Representation with Parsimony, while others have polynomial worst-case running time but work in a greedy fashion (FlipCut). Both can profit from a set of clades that are already known to be part of the supertree. The Superfine approach shows how the Greedy Strict Consensus Merger (GSCM) can be used as preprocessing to find these clades. We introduce different scoring functions for the GSCM, a randomization, as well as a combination thereof to improve the GSCM to find more clades. This helps, in turn, to improve the resolution of the GSCM supertree. We find this modifications to increase the number of true positive clades by 18\% compared to the currently used Overlap scoring.},
    doi = {10.7717/peerj.2172},
    file = {FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2016:2016/FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2016.pdf:PDF},
    keywords = {jena; supertree; phylogenetics; scm; Consensus; Supertree; Supermatrix; Divide and Conquer; FlipCut; Phylogeny},
    optmonth = {#jun#},
    owner = {fleisch},
    pmid = {27375971},
    timestamp = {2016.06.29},
    url = {https://doi.org/10.7717/peerj.2172},
    }
  • [DOI] M. Meusel, F. Hufsky, F. Panter, D. Krug, R. Müller, and S. Böcker, “Predicting the presence of uncommon elements in unknown biomolecules from isotope patterns,” Anal chem, vol. 88, iss. 15, pp. 7556-7566, 2016.
    [Bibtex]
    @Article{meusel16predicting,
    author = {Marvin Meusel and Franziska Hufsky and Fabian Panter and Daniel Krug and Rolf M\"uller and Sebastian B\"ocker},
    title = {Predicting the presence of uncommon elements in unknown biomolecules from isotope patterns},
    journal = {Anal Chem},
    year = {2016},
    volume = {88},
    number = {15},
    pages = {7556-7566},
    abstract = {Motivation: The determination of the molecular formula is one of the earliest and most important steps when investigating the chemical nature of an unknown compound. Common approaches use the isotopic pattern of a compound measured using mass spectrometry. Computational methods to determine the molecular formula from this isotopic pattern require a fixed set of elements. Considering all possible elements severely increases running times and more importantly the chance for false positive identifications as the number of candidate formulas for a given target mass rises significantly if the constituting elements are not pre-filtered. This negative effect grows stronger for compounds of higher molecular mass as the effect of a single atom on the overall isotopic pattern grows smaller. On the other hand, hand-selected restrictions on this set of elements may prevent the identification of the correct molecular formula. Thus, it is a crucial step to determine the set of elements most likely comprising the compound prior to the assignment of an elemental formula to an exact mass.
    Results: In this paper, we present a method to determine the presence of certain elements (sulfur, chlorine, bromine, boron and selenium) in the compound from its (high mass accuracy) isotopic pattern. We limit ourselves to biomolecules, in the sense of products from nature or synthetic products with potential bioactivity. The classifiers developed here predict the presence of an element with a very high sensitivity and high specificity. We evaluate classifiers on three real-world datasets with 663 isotope patterns in total: 184 isotope patterns containing sulfur, 187 containing chlorine, 14 containing bromine, one containing boron, one containing selenium. In no case do we make a false negative prediction; for chlorine, bromine, boron, and selenium, we make ten false positive predictions in total. We also demonstrate the impact of our method on the identification of molecular formulas, in particular the number of considered candidates and running time.},
    doi = {10.1021/acs.analchem.6b01015},
    keywords = {jena; MS; MS1; isotope pattern;},
    owner = {Sebastian},
    pmid = {27398867},
    timestamp = {2016.07.11},
    }
  • [DOI] S. Winter, K. Jahn, S. Wehner, L. Kuchenbecker, M. Marz, J. Stoye, and S. Böcker, “Finding approximate gene clusters with Gecko\,3,” Nucleic acids res, vol. 44, iss. 20, pp. 9600-9610, 2016.
    [Bibtex]
    @Article{winter16finding,
    author = {Sascha Winter and Katharina Jahn and Stefanie Wehner and Leon Kuchenbecker and Manja Marz and Jens Stoye and Sebastian B\"ocker},
    title = {Finding approximate gene clusters with {Gecko\,3}},
    journal = {Nucleic Acids Res},
    year = {2016},
    volume = {44},
    number = {20},
    pages = {9600-9610},
    abstract = {Gene-order based comparison of multiple genomes provides signals for functional analysis of genes and the evolutionary process of genome organization. Gene clusters are regions of co-localized genes on genomes of different species. The rapid increase in sequenced
    genomes necessitates bioinformatics tools for finding gene clusters in hundreds of genomes. Existing tools are often restricted to few (in many cases, only two) genomes, and often make restrictive assumptions such as short perfect conservation, conserved gene order, or
    monophyletic gene clusters.
    We present Gecko 3, an open-source software for finding gene clusters in hundreds of bacterial genomes, that comes with an easy-to-use graphical user interface. The underlying gene cluster model is intuitive, can cope with low degrees of conservation as well as misannotations, and is complemented by a sound statistical evaluation. To evaluate the biological benefit of Gecko 3 and to exemplify our method, we search for gene clusters in a dataset of 678 bacterial genomes using Synechocystis sp. PCC 6803 as a reference. We confirm detected gene clusters reviewing the literature and comparing them to a database of operons; we detect two novel clusters, which were confirmed by publicly available experimental RNA-Seq data. The computational analysis is carried out on a laptop computer in less than 40 minutes.},
    doi = {10.1093/nar/gkw843},
    keywords = {jena; gene cluster; 3AGC; approximate gene cluster; finding gene clusters; operons;},
    owner = {Sebastian},
    pmid = {27679480},
    timestamp = {2016.09.16},
    }

2015

  • [DOI] S. Böcker, “Ten times eighteen,” J inform process jpn, vol. 23, iss. 3, p. 258–264, 2015.
    [Bibtex]
    @Article{boecker15ten,
    author = {Sebastian B\"ocker},
    title = {Ten times eighteen},
    journal = {J Inform Process Jpn},
    year = {2015},
    volume = {23},
    number = {3},
    pages = {258--264},
    abstract = {We consider the following simple game: We are given a table with ten slots indexed one to ten. In each of the ten rounds of the game, three dice are rolled and the numbers are added. We then put this number into any free slot. For each slot, we multiply the slot index with the number in this slot, and add up the products. The goal of the game is to maximize this score. In more detail, we play the game many times, and try to maximize the sum of scores or, equivalently, the expected score. We present a strategy to optimally play this game with respect to the expected score. We then modify our strategy so that we need only polynomial time and space. Finally, we show that knowing all ten rolls in advance, results in a relatively small increase in score. Although the game has a random component and requires a non-trivial strategy to be solved optimally, this strategy needs only polynomial time and space.},
    doi = {10.2197/ipsjjip.23.258},
    keywords = {jena;},
    owner = {Sebastian},
    timestamp = {2015.01.08},
    }
  • [DOI] K. Dührkop and S. Böcker, “Fragmentation trees reloaded,” in Proc. of research in computational molecular biology (recomb 2015), 2015, p. 65–79.
    [Bibtex]
    @InProceedings{duehrkop15fragmentation,
    author = {Kai D\"uhrkop and Sebastian B\"ocker},
    title = {Fragmentation trees reloaded},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2015)},
    year = {2015},
    volume = {9029},
    series = lncs,
    pages = {65--79},
    organization = Springer,
    abstract = {Metabolites, small molecules that are involved in cellular reactions, provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually relies on tandem mass spectrometry to identify the thousands of compounds in a biological sample. Today, the vast majority of metabolites remain unknown. Fragmentation trees have become a powerful tool for the interpretation of tandem mass spectrometry data of small molecules. These trees are found by combinatorial optimization, and aim at explaining the experimental data via fragmentation cascades. To obtain biochemically meaningful results requires an elaborate optimization function.
    We present a new scoring for computing fragmentation trees, transforming the combinatorial optimization into a maximum a~posteriori estimator. We demonstrate the superiority of the new scoring for two tasks: Both for the de novo identification of molecular formulas of unknown compounds, and for searching a database for structurally similar compounds, our methods performs significantly better than the previous scoring, as well as other methods for this task. Our method can expedite the workflow for untargeted metabolomics, allowing researchers to investigate unknowns using automated computational methods.},
    doi = {10.1007/978-3-319-16706-0_10},
    journalref = {boecker16fragmentation},
    keywords = {jena; FT; fragmentation trees;},
    owner = {Sebastian},
    timestamp = {2013.06.10},
    }
  • [DOI] K. Dührkop, H. Shen, M. Meusel, J. Rousu, and S. Böcker, “Searching molecular structure databases with tandem mass spectra using CSI:FingerID,” Proc natl acad sci u s a, vol. 112, iss. 41, p. 12580–12585, 2015.
    [Bibtex]
    @Article{duehrkop15searching,
    author = {Kai D\"uhrkop and Huibin Shen and Marvin Meusel and Juho Rousu and Sebastian B\"ocker},
    title = {Searching molecular structure databases with tandem mass spectra using {CSI:FingerID}},
    journal = {Proc Natl Acad Sci U S A},
    year = {2015},
    volume = {112},
    number = {41},
    pages = {12580--12585},
    abstract = {Metabolites provide a direct functional signature of cellular state. Untargeted metabolomics experiments usually rely on tandem MS to identify the thousands of compounds in a biological sample. Today, the vast majority of metabolites remain unknown. We present a method for searching molecular structure databases using tandem MS data of small molecules. Our method computes a fragmentation tree that best explains the fragmentation spectrum of an unknown molecule. We use the fragmentation tree to predict the molecular structure fingerprint of the unknown compound using machine learning. This fingerprint is then used to search a molecular structure database such as PubChem. Our method is shown to improve on the competing methods for computational metabolite identification by a considerable margin.},
    doi = {10.1073/pnas.1509788112},
    keywords = {jena; MS; tandem MS; molecular fingerprints; FingerID; databases; molecular structures;},
    optmonth = {#oct#},
    owner = {Sebastian},
    pmid = {26392543},
    timestamp = {2015.08.12},
    }
  • [DOI] M. S. Engler, S. Crotty, M. J. Barthel, C. Pietsch, K. Knop, U. S. Schubert, and S. Böcker, “COCONUT – an efficient tool for estimating copolymer compositions from mass spectra,” Anal chem, vol. 87, iss. 10, p. 5223–5231, 2015.
    [Bibtex]
    @Article{engler15coconut,
    author = {Engler, Martin S. and Crotty, Sarah and Barthel, Markus J and Pietsch, Christian and Knop, Katrin and Schubert, Ulrich S. and B\"ocker, Sebastian},
    title = {{COCONUT} -- An Efficient Tool for Estimating Copolymer Compositions from Mass Spectra},
    journal = {Anal Chem},
    year = {2015},
    volume = {87},
    number = {10},
    pages = {5223--5231},
    abstract = {The accurate characterization of synthetic polymer sequences represents a major challenge in polymer science. Matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF MS) is frequently used for the characterization of copolymer samples. We present the COCONUT software for estimating the composition distribution of the copolymer. Our method is based on Linear Programming and is capable of automatically resolving overlapping isotopes and isobaric ions. We demonstrate that COCONUT is well suited for analyzing complex copolymer MS spectra. COCONUT is freely available and provides a graphical user interface.},
    doi = {10.1021/acs.analchem.5b00146},
    file = {EnglerEtAl_COCONUTEfficientTool_AnalChem_2015_preprint.pdf:2015/EnglerEtAl_COCONUTEfficientTool_AnalChem_2015_preprint.pdf:PDF},
    keywords = {polymer ms; jena; linkPDF},
    owner = {martin},
    pmid = {25884349},
    timestamp = {2015.04.20},
    url = {http://pubs.acs.org/articlesonrequest/AOR-ek4NKbcskMyjJttVVcEJ},
    }
  • [DOI] M. Fleischauer and S. Böcker, “Collecting reliable clades using the greedy strict consensus merger,” in Proc. of german conference on bioinformatics (gcb 2015), 2015, p. e1595.
    [Bibtex]
    @InProceedings{fleischauer15collecting,
    author = {Fleischauer, Markus and B{\"o}cker, Sebastian},
    title = {Collecting reliable clades using the Greedy Strict Consensus Merger},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2015)},
    year = {2015},
    volume = {3},
    series = {PeerJ PrePrints},
    pages = {e1595},
    publisher = {PeerJ Inc. San Francisco, USA},
    abstract = {Supertree methods combine a set of phylogenetic trees into a single supertree. Similar to supermatrix methods, these methods provide a way to reconstruct larger parts of the Tree of Life, potentially evading the computational complexity of phylogenetic inference methods such as maximum likelihood. The supertree problem can be formalized in different ways, to cope with contradictory information in the input. Many supertree methods have been developed. Some of them solve NP-hard optimization problems like the well known Matrix Representation with Parsimony, others have polynomial worst-case running time but work in a greedy fashion (FlipCut). Both can profit from a set of clades that are already known to be part of the supertree. The Superfine approach shows how the Greedy Strict Consensus Merger (GSCM) can be used as preprocessing to find these clades. We introduce different scoring functions for the GSCM, a randomization, as well as a combination thereof to improve the GSCM to find more clades. This helps, in turn, to improve the resolution of the final supertree. We find this modifications to increase the number of true positive clades by 16\% while decreasing the number of false positive clades by 3\% compared to the currently used Overlap scoring.},
    doi = {10.7287/peerj.preprints.1297v1},
    file = {FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2015.pdf:2015/FleischauerAndBoecker_CollectingCladesGSCM_Peerj_2015.pdf:PDF},
    journalref = {fleischauer16collecting},
    keywords = {jena; supertree; phylogenetics; scm;},
    owner = {fleisch},
    timestamp = {2015.08.27},
    url = {https://peerj.com/preprints/1297v1/},
    }
  • [DOI] P. Kulkarni, F. Kaftan, P. Kynast, A. Svatoš, and S. Böcker, “Correcting mass shifts: a lock-mass-free recalibration procedure for mass spectrometry imaging data,” Anal bioanal chem, vol. 405, iss. 25, p. 7603–7613, 2015.
    [Bibtex]
    @Article{kulkarni15correcting,
    author = {Purva Kulkarni and Filip Kaftan and Philipp Kynast and Ale\v{s} Svato\v{s} and Sebastian B\"ocker},
    title = {Correcting mass shifts: A lock-mass-free recalibration procedure for mass spectrometry imaging data},
    journal = {Anal Bioanal Chem},
    year = {2015},
    volume = {405},
    number = {25},
    pages = {7603--7613},
    abstract = {Mass spectrometry imaging (MSI) has become widely popular
    because of its potential to map the spatial distribution of thousands
    of compounds in a single measurement directly from tissue
    surfaces. With every MSI experiment, it is important to maintain
    high mass accuracy for correct identification of the observed ions.
    Many times this can be compromised due to different experimental
    factors, leading to erroneous assignment of peaks. This makes
    recalibration a crucial preprocessing step. We describe a lock
    mass-free mass spectra recalibration method, which enables to
    significantly reduce these mass shift effects. The recalibration
    method is applied in three steps: First, we decide on an order to
    process all the spectra. Herein, we describe three different methods
    for ordering the spectra - minimum spanning tree (MST),
    topological greedy (TG), and crystal growth (CG). Second, we
    construct a reference (consensus) spectrum, from the ordered
    spectra, and third, all spectra are individually corrected against this
    consensus spectrum. The performance of the recalibration method
    is demonstrated on three imaging datasets acquired from matrix-assisted laser desorptionionization (MALDI) and laser
    desorption/ionization (LDI) mass spectrometry imaging of
    whole-body Drosophila melanogaster fly. The applied recalibration
    method is shown to strongly reduce the observed mass shifts in the
    imaging datasets. Among the three ordering methods, CG and
    MST perform comparatively better than TG and highly decrease
    the overall standard deviation of the mass error distribution. Lock
    mass correction of MSI data is practically difficult, as not all spectra
    contain the selected lock mass peak. Our method eliminates this
    need.},
    doi = {10.1007/s00216-015-8935-4},
    keywords = {Mass spectrometry imaging, Recalibration, Mass shift correction, Data processing, jena},
    optmonth = {#oct#},
    owner = {Sebastian},
    pmid = {26345438},
    timestamp = {2015.08.27},
    url = {http://link.springer.com/article/10.1007/s00216-015-8935-4},
    }
  • [DOI] T. W. J. White, S. Beyer, K. Dührkop, M. Chimani, and S. Böcker, “Speedy colorful subtrees,” in Proc. of computing and combinatorics conference (cocoon 2015), 2015, p. 310–322.
    [Bibtex]
    @InProceedings{white15speedy,
    author = {W. Timothy J. White and Stephan Beyer and Kai D\"uhrkop and Markus Chimani and Sebastian B\"ocker},
    title = {Speedy Colorful Subtrees},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2015)},
    year = {2015},
    volume = {9198},
    series = lncs,
    pages = {310--322},
    publisher = Springer,
    abstract = {Fragmentation trees are a technique for identifying molecular formulas and deriving some chemical properties of metabolites---small organic molecules---solely from mass spectral data. Computing these trees involves finding exact solutions to the NP-hard Maximum Colorful Subtree problem. Existing solvers struggle to solve the large instances involved fast enough to keep up with instrument throughput, and their performance remains a hindrance to adoption in practice. We attack this problem on two fronts: by combining fast and effective reduction algorithms with a strong integer linear program (ILP) formulation of the problem, we achieve overall speedups of 9.4 fold and 8.8 fold on two sets of real-world problems---without sacrificing optimality. Both approaches are, to our knowledge, the first of their kind for this problem. We also evaluate the strategy of solving global problem instances, instead of first subdividing them into many candidate instances as has been done in the past. Software (C++ source for our reduction program and our CPLEX/Gurobi driver program) available under LGPL at https://github.com/wtwhite/speedy_colorful_subtrees/.},
    doi = {10.1007/978-3-319-21398-9_25},
    file = {WhiteEtAl_SpeedyColorfulSubtrees_preprint_2015.pdf:2015/WhiteEtAl_SpeedyColorfulSubtrees_preprint_2015.pdf:PDF},
    keywords = {jena; linkpdf; PABI; IDUN; tandem ms;},
    owner = {wtwhite},
    timestamp = {2015.04.20},
    }

2014

  • [DOI] S. Böcker and S. Wagner, “Counting glycans revisited,” J math biol, vol. 69, iss. 4, p. 799–816, 2014.
    [Bibtex]
    @Article{boecker14counting,
    author = {Sebastian B\"ocker and Stephan Wagner},
    title = {Counting glycans revisited},
    journal = {J Math Biol},
    year = {2014},
    volume = {69},
    number = {4},
    pages = {799--816},
    abstract = {We present an algorithm for counting glycan topologies of order n that improves on previously described algorithms by a factor n in both time and space. More generally, we provide such an algorithm for counting rooted or unrooted d-ary trees with labels or masses assigned to the vertices, and we give a "recipe" to estimate the asymptotic growth of the resulting sequences. We provide constants for the asymptotic growth of d-ary trees and labeled quaternary trees (glycan topologies). Finally, we show how a classical result from enumeration theory can be used to count glycan structures where edges are labeled by bond types. Our method also improves time bounds for counting alkanes.},
    doi = {10.1007/s00285-013-0721-3},
    file = {:2014/BoeckerWagner_CountingGlycansRevisited_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; glycans; counting; counting glycans; generating functions; Polya;},
    owner = {Sebastian},
    pmid = {23974240},
    timestamp = {2013.07.24},
    }
  • C. Beckstein, S. Böcker, M. Bogdan, H. Bruelheide, M. H. Bücker, J. Denzler, P. Dittrich, I. G., A. Hinneburg, B. König-Ries, F. Löffler, M. Marz, M. Müller-Hannemann, M. Winter, and W. Zimmermann, “Explorative analysis of heterogeneous, unstructured, and uncertain data: a computer science perspective on biodiversity research,” in Proc. of data management technologies and applications (data 2014), 2014, p. 251–257.
    [Bibtex]
    @InProceedings{beckstein14explorative,
    author = {Clemens Beckstein and Sebastian B\"ocker and Martin Bogdan and Helge Bruelheide and H. Martin B\"ucker and Joachim Denzler and Peter Dittrich and Ivo Gro\ss{}e and Alexander Hinneburg and Birgitta K\"onig-Ries and Felicitas L\"offler and Manja Marz and Matthias M\"uller-Hannemann and Martin Winter and Wolf Zimmermann},
    title = {Explorative Analysis of Heterogeneous, Unstructured, and Uncertain Data: A Computer Science Perspective on Biodiversity Research},
    booktitle = {Proc. of Data Management Technologies and Applications (DATA 2014)},
    year = {2014},
    pages = {251--257},
    publisher = {SciTePress},
    abstract = {We outline a blueprint for the development of new computer science approaches for the management and analysis of big data problems for biodiversity science. Such problems are characterized by a combination of different data sources each of which owns at least one of the typical characteristics of big data (volume, variety, velocity, or veracity). For these problems, we envision a solution that covers different aspects of integrating data sources and algorithms for their analysis on one of the following three layers: At the data layer, there are various data archives of heterogeneous, unstructured, and uncertain data. At the functional layer, the data are analyzed for each archive individually. At the meta-layer, multiple functional archives are combined for complex analysis.},
    keywords = {jena; iDiv; biodiversity;},
    opteditor = {M. Helfert and A. Holzinger and O. Belo and C. Francalanci},
    owner = {Sebastian},
    timestamp = {2014.06.25},
    }
  • [DOI] K. Dührkop, F. Hufsky, and S. Böcker, “Molecular formula identification using isotope pattern analysis and calculation of fragmentation trees,” Mass spectrom, vol. 3, iss. special issue 2, p. S0037, 2014.
    [Bibtex]
    @Article{duehrkop14molecular,
    author = {Kai D\"uhrkop and Franziska Hufsky and Sebastian B\"ocker},
    title = {Molecular Formula Identification Using Isotope Pattern Analysis and Calculation of Fragmentation Trees},
    journal = {Mass Spectrom},
    year = {2014},
    volume = {3},
    number = {special issue 2},
    pages = {S0037},
    abstract = {We present the results of a fully automated de novo approach for identification of molecular formulas in the CASMI 2013 contest. Only results for Category 1 (molecular formula identification) were submitted. Our approach combines isotope pattern analysis and fragmentation pattern analysis and is completely independent from any (spectral and structural) database. We correctly identified the molecular formula for ten out of twelve challenges, being the best automated method competing in this category.},
    doi = {10.5702/massspectrometry.S0037},
    keywords = {jena; MS; CASMI; SIRIUS;},
    owner = {Sebastian},
    timestamp = {2014.05.19},
    }
  • [DOI] D. Doerr, J. Stoye, S. Böcker, and K. Jahn, “Identifying gene clusters by discovering common intervals in indeterminate strings,” Bmc genomics, vol. 15, iss. Suppl 6, p. S2, 2014.
    [Bibtex]
    @Article{doerr14identifying,
    author = {Daniel Doerr and Jens Stoye and Sebastian B\"ocker and Katharina Jahn},
    title = {Identifying gene clusters by discovering common intervals in indeterminate strings},
    journal = {BMC Genomics},
    year = {2014},
    volume = {15},
    number = {Suppl 6},
    pages = {S2},
    note = {Proc. of \emph{RECOMB Satelite Workshop on Comparative Genomics} (RECOMB-CG 2014)},
    abstract = {Comparative analyses of chromosomal gene orders are successfully used to predict gene clusters in bacterial and fungal genomes. Present models for detecting sets of co-localized genes in chromosomal sequences require prior knowledge of gene family assignments between genes in the dataset of interest. These families are often computationally predicted on the basis of sequence similarity or higher order features of gene products. Erroneous gene family assignments emerging in this process are amplified in subsequent gene order analyses and thus may deteriorate gene cluster prediction. In this work we present a new dynamic model and efficient computational approaches for gene cluster prediction suitable in scenarios ranging from traditional gene family-based gene cluster prediction, via multiple conflicting gene family annotations, to gene family-free analysis, in which gene clusters are predicted solely on the basis of a pairwise similarity measure between the genes of different genomes. We evaluate our gene family-free model against a gene family-based model on a dataset of 93 bacterial genomes. Our model is able to detect gene clusters that would be also detected with well-established gene-family based approaches. Moreover, we show that it is able to detect conserved regions which are missed by gene family-based methods due to wrong or deficient gene family assignments.},
    booktitle = {Proc. of},
    doi = {10.1186/1471-2164-15-S6-S2},
    keywords = {jena; gene clusters; common intervals;},
    owner = {Sebastian},
    pmid = {25571793},
    timestamp = {2014.07.30},
    url = {http://www.biomedcentral.com/1471-2164/15/S6/S2},
    }
  • [DOI] F. Hufsky, K. Scheubert, and S. Böcker, “Computational mass spectrometry for small molecule fragmentation,” Trends anal chem, vol. 53, p. 41–48, 2014.
    [Bibtex]
    @Article{hufsky14computational,
    author = {Franziska Hufsky and Kerstin Scheubert and Sebastian B\"ocker},
    title = {Computational mass spectrometry for small molecule fragmentation},
    journal = {Trends Anal Chem},
    year = {2014},
    volume = {53},
    pages = {41--48},
    abstract = {The identification of small molecules from mass spectrometry (MS) data remains a major challenge in the interpretation of MS data. Computational aspects of identifying small molecules range from searching a reference spectral library to the structural elucidation of an unknown. In this review, we concentrate on five important aspects of the computational analysis. We find that novel computational methods may overcome the boundaries of spectral libraries, by searching in the more comprehensive molecular structure databases, or not requiring any databases at all.},
    comment = {url is good only until March 26, 2014},
    doi = {10.1016/j.trac.2013.09.008},
    file = {:2014/HufskyEtAl_ComputationalMassSpectrometry_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; review;},
    owner = {fhufsky},
    timestamp = {2013.09.19},
    url = {http://elsarticle.com/1ej1Fsn},
    }
  • [DOI] F. Hufsky, K. Scheubert, and S. Böcker, “New kids on the block: Novel informatics methods for natural product discovery,” Nat prod rep, vol. 31, iss. 6, p. 807–817, 2014.
    [Bibtex]
    @Article{hufsky14new,
    author = {Franziska Hufsky and Kerstin Scheubert and Sebastian B\"ocker},
    title = {New kids on the block: {Novel} informatics methods for natural product discovery},
    journal = {Nat Prod Rep},
    year = {2014},
    volume = {31},
    number = {6},
    pages = {807--817},
    abstract = {Mass spectrometry is a key technology for the identification and structural elucidation of natural products. Manual interpretation of the resulting data is tedious and time-consuming, so methods for automated analysis are highly sought after. In this review, we focus on four recently developed methods for the detection and investigation of small molecules, namely MetFrag/MetFusion, ISIS, FingerID, and FT-BLAST. These methods have the potential to significantly advance the field of computational mass spectrometry for the research of natural products. For example, they may help with the dereplication of compounds at an early stage of the drug discovery process; that is, the detection of molecules that are identical or highly similar to known drugs or drug leads. Furthermore, when a potential drug lead has been determined, these tools may help to identify it and elucidate its structure.},
    doi = {10.1039/C3NP70101H},
    file = {:2014/HufskyEtAl_NewKidsBlock_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; review; computational MS;},
    owner = {fhufsky},
    pmid = {24752343},
    timestamp = {2013.10.24},
    }
  • [DOI] F. Kaftan, V., P. Kynast, P. Kulkarni, S. Böcker, J. Cvačka, M. Knaden, and A. Svatoš, “Mass spectrometry imaging of surface lipids on intact \textitDrosophila melanogaster flies,” J mass spectrom, vol. 49, iss. 3, p. 223–232, 2014.
    [Bibtex]
    @Article{kaftan14mass,
    author = {Filip Kaftan and Vladim\'\i{}r Vrkoslav and Philipp Kynast and Purva Kulkarni and Sebastian B\"ocker and Josef Cva\v{c}ka and Markus Knaden and Ale\v{s} Svato\v{s}},
    title = {Mass Spectrometry Imaging of Surface Lipids on Intact \textit{{Drosophila melanogaster}} Flies},
    journal = {J Mass Spectrom},
    year = {2014},
    volume = {49},
    number = {3},
    pages = {223--232},
    abstract = {The spatial distribution of neutral lipids and semiochemicals on the surface of six-day-old separately reared naive Drosophila melanogaster flies has been visualized and studied using matrix-assisted laser desorption/ionization-time of flight (MALDI-TOF) mass spectrometry and laser-assisted desorption/ionization (LDI)-TOF imaging (MSI). Metal targets were designed for two-dimensional MSI of the surface of 3-D biological objects. Targets with either simple grooves or profiled holes designed to accurately accommodate the male and female bodies were fabricated. These grooves and especially holes ensured correct height fixation and spatial orientation of the flies on the targets after matrix application and sample drying. For LDI-TOF to be used, the flies were arranged into holes and fixed to a plane of the target using fast-setting glue. In MALDI-TOF mode, the flies were fixed as above and sprayed with a lithium 2,5-dihydroxybenzoate matrix using up to 100 airbrush spray cycles. The scanning electron microscopy images revealed that the deposits of matrix were homogenous and the matrix formed mostly into the clusters of crystals (40-80?$\mu$m) that were separated from each other by an uncovered cuticle surface (30-40?$\mu$m). The MSI using target with profiled holes provided superior results to the targets with simple grooves, eliminating the ion suppression/mass deviation due to the 3-D shape of the flies. Attention was paid to neutral lipids and other compounds including the male anti-attractant 11-cis-vaccenyl acetate for which the expected distribution with high concentration on the tip of the male abdomen was confirmed. The red and blue mass shift (PlusMinus1 colour scale) was observed associated with mass deviation predominantly between $\pm$0.2 and 0.3?Da. We use in-house developed software for mass recalibration, to eliminate the mass deviation effects and help with the detection of low-intensity mass signals. Copyright {\copyright} 2014 John Wiley & Sons, Ltd.},
    doi = {10.1002/jms.3331},
    file = {KaftanEtAl_ImagingSurfaceLipidsDrosophila_JMS_2014.pdf:2014/KaftanEtAl_ImagingSurfaceLipidsDrosophila_JMS_2014.pdf:PDF},
    keywords = {jena; imaging MS; MS; lipids; linkPDF;, Imaging MS, Mass Spectrometry Imaging, MALDI, IMS, SIMS},
    owner = {Sebastian},
    pmid = {24619548},
    timestamp = {2013.12.30},
    }
  • [DOI] M. Napagoda, J. Gerstmeier, A. Koeberle, S. Wesely, S. Popella, S. Lorenz, K. Scheubert, S. Böcker, A. Svatoš, and O. Werz, “\textitMunronia pinnata (Wall.) Theob.: unveiling phytochemistry and dual inhibition of 5-lipoxygenase and microsomal prostaglandin E$_2$ synthase (mPGES)-1,” J ethnopharmacol, vol. 151, iss. 2, p. 882–890, 2014.
    [Bibtex]
    @Article{napagoda14munronia,
    author = {Mayuri Napagoda and Jana Gerstmeier and Andreas Koeberle and Sandra Wesely and Sven Popella and Sybille Lorenz and Kerstin Scheubert and Sebastian B\"ocker and Ale\v{s} Svato\v{s} and Oliver Werz},
    title = {\textit{Munronia pinnata} ({Wall.}) {Theob.}: Unveiling phytochemistry and dual inhibition of 5-lipoxygenase and microsomal prostaglandin {E$_2$} synthase {(mPGES)-1}},
    journal = {J Ethnopharmacol},
    year = {2014},
    volume = {151},
    number = {2},
    pages = {882--890},
    abstract = {Preparations from Munronia pinnata (Wall.) Theob. are extensively used in traditional medicine in Sri Lanka for the treatment of inflammatory conditions. However, neither the pharmacological features nor the phytochemistry of this plant are explored in order to understand and rationalize the reported ethnobotanical significance. As 5-lipoxygenase (5-LO) and microsomal prosta-glandin E2 synthase (mPGES)-1 are crucial enzymes in inflammatory disorders, we evaluated their inhibition by M. pinnata extracts and studied the chemical profile of the plant for the identification of relevant constituents. Cell-free and cell-based assays were employed in order to investigate the suppression of 5-LO and mPGES-1 activity. Cell viability, radical scavenger activities, and inhibition of reactive oxygen species formation (ROS) in neutrophils were studied to assess cytotoxic and antioxidant effects. Gas and liquid chromatography coupled to mass spectrometric analysis enabled the characterization of secondary metabolites. The n-hexane extract of M. pinnata efficiently suppressed 5-LO activity in stimulated human neutrophils (IC50 = 8.7 mg/ml) and potently inhibited isolated human recombinant 5-LO (IC50 = 0.48 mg/ml) and mPGES-1 (IC50 = 1.0 mg/ml). In contrast, no significant radical scavenging activity or suppression of ROS formation was observed, and neutrophil viability was unaffected. The phytochemistry of the plant was unveiled for the first time and phytosterols, fatty acids, sesquiterpenes and several other types of secondary metabolites were identified. Together, potent inhibition of 5-LO and mPGES-1 activity, without concomitant antioxidant activity and cytotoxic effects, rationalizes the ethnopharmacological use of M. pinnata as anti-inflammatory remedy. Detailed chromatographic/mass spectrometric analysis reveals discrete chemical structures of relevant constituents.},
    doi = {10.1016/j.jep.2013.11.052},
    keywords = {jena; IDUN; FT; fragmentation tree;},
    owner = {Sebastian},
    pmid = {24315851},
    timestamp = {2013.12.22},
    }
  • [DOI] K. Scheubert, F. Hufsky, and S. Böcker, “Multiple mass spectrometry fragmentation trees revisited: boosting performance and quality,” in Proc. of workshop on algorithms in bioinformatics (wabi 2014), 2014, p. 217–231.
    [Bibtex]
    @InProceedings{scheubert14multiple,
    author = {Kerstin Scheubert and Franziska Hufsky and Sebastian B\"ocker},
    title = {Multiple Mass Spectrometry Fragmentation Trees Revisited: Boosting Performance and Quality},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2014)},
    year = {2014},
    volume = {8701},
    series = lncs,
    pages = {217--231},
    publisher = Springer,
    abstract = {Mass spectrometry (MS) in combination with a fragmentation technique is the method of choice for analyzing small molecules in high throughput experiments. The automated interpretation of such data is highly non-trivial. Recently, fragmentation trees have been introduced for de novo analysis of tandem fragmentation spectra (MS$^2$), describing the fragmentation process of the molecule. Multiple-stage MS (MS$^n$) reveals additional information about the dependencies between fragments. Unfortunately, the computational analysis of MS$^n$ data using fragmentation trees turns out to be more challenging than for tandem mass spectra. We present an Integer Linear Program for solving the Combined Colorful Subtree problem, which is orders of magnitude faster than the currently best algorithm which is based on dynamic programming. Using the new algorithm, we show that correlation between structural similarity and fragmentation tree similarity increases when using the additional information gained from MS$^n$. Thus, we show for the first time that using MS$^n$ data improves the quality of fragmentation trees.},
    doi = {10.1007/978-3-662-44753-6_17},
    file = {:2014/ScheubertEtAl_MultipleMassSpectrometry_preprint_2014.pdf:PDF},
    keywords = {jena; linkpdf; multiple MS; MSn; metabolites; small molecules;},
    owner = {Sebastian},
    timestamp = {2014.06.22},
    }
  • [DOI] V. U. Schwartze, S. Winter, E. Shelest, M. Marcet-Houben, F. Horn, S. Wehner, J. Linde, V. Valiante, M. Sammeth, K. Riege, M. Nowrousian, K. Kaerger, I. D. Jacobsen, M. Marz, A. A. Brakhage, T. Gabaldón, S. Böcker, and K. Voigt, “Gene expansion shapes genome architecture in the human pathogen \textitLichtheimia corymbifera: an evolutionary genomics analysis in the ancient terrestrial Mucorales (Mucoromycotina),” Plos genet, vol. 10, iss. 8, p. e1004496, 2014.
    [Bibtex]
    @Article{schwartze14gene,
    author = {Volker U. Schwartze and Sascha Winter and Ekaterina Shelest and Marina Marcet-Houben and Fabian Horn and Stefanie Wehner and J\"org Linde and Vito Valiante and Michael Sammeth and Konstantin Riege and Minou Nowrousian and Kerstin Kaerger and Ilse D. Jacobsen and Manja Marz and Axel A. Brakhage and Toni Gabald\'on and Sebastian B\"ocker and Kerstin Voigt},
    title = {Gene expansion shapes genome architecture in the human pathogen \textit{{Lichtheimia corymbifera}}: an evolutionary genomics analysis in the ancient terrestrial {Mucorales} ({Mucoromycotina})},
    journal = {PLos Genet},
    year = {2014},
    volume = {10},
    number = {8},
    pages = {e1004496},
    abstract = {Lichtheimia species are the second most important cause of mucormycosis in Europe. To provide broader insights into the molecular basis of the pathogenicity-associated traits of the basal Mucorales, we report the full genome sequence of L. corymbifera and compared it to the genome of Rhizopus oryzae, the most common cause of mucormycosis worldwide. The genome assembly encompasses 33.6 MB and 12,379 protein-coding genes. This study reveals four major differences of the L. corymbifera genome to R. oryzae: (i) the presence of an highly elevated number of gene duplications which are unlike R. oryzae not due to whole genome duplication (WGD), (ii) despite the relatively high incidence of introns, alternative splicing (AS) is not frequently observed for the generation of paralogs and in response to stress, (iii) the content of repetitive elements is strikingly low (< 5\%), (iv) L. corymbifera is typically haploid. Novel virulence factors were identified which may be involved in the regulation of the adaptation to iron-limitation, e.g. LCor01340.1 encoding a putative siderophore transporter and LCor00410.1 involved in the siderophore metabolism. Genes encoding the transcription factors LCor08192.1 and LCor01236.1, which are similar to GATA type regulators and to calcineurin regulated CRZ1, respectively, indicating an involvement of the calcineurin pathway in the adaption to iron limitation. Genes encoding MADS-box transcription factors are elevated up to 11 copies compared to the 1-4 copies usually found in other fungi. More findings are: (i) lower content of tRNAs, but unique codons in L. corymbifera, (ii) Over 25\% of the proteins are apparently specific for L. corymbifera. (iii) L. corymbifera contains only 2/3 of the proteases (known to be essential virulence factors) in comparision to R. oryzae. On the other hand, the number of secreted proteases, however, is roughly twice as high a in R. oryzae.},
    doi = {10.1371/journal.pgen.1004496},
    keywords = {jena; 3AGC; gene clusters},
    owner = {Sebastian},
    pmid = {25121733},
    timestamp = {2014.05.25},
    }
  • [DOI] H. Shen, K. Dührkop, S. Böcker, and J. Rousu, "Metabolite identification through multiple kernel learning on fragmentation trees," Bioinformatics, vol. 30, iss. 12, p. i157-i164, 2014.
    [Bibtex]
    @Article{shen14metabolite,
    author = {Huibin Shen and Kai D\"uhrkop and Sebastian B\"ocker and Juho Rousu},
    title = {Metabolite Identification through Multiple Kernel Learning on Fragmentation Trees},
    journal = {Bioinformatics},
    year = {2014},
    volume = {30},
    number = {12},
    pages = {i157-i164},
    note = {Proc.\ of \emph{Intelligent Systems for Molecular Biology} (ISMB 2014)},
    abstract = {Metabolite identification from tandem mass spectrometric data is a key task in metabolomics. Various computational methods has been proposed for the identification of metabolites from tandem mass spectra. Fragmentation tree methods explore the space of possible ways the metabolite can fragment, and base the metabolite identification on scoring of these fragmentation trees. Machine learning methods has been used to map mass spectra to molecular fingerprints; predicted fingerprints, in turn, can be used to score candidate molecular structures. Here, we combine fragmentation tree computations with kernel-based machine learning to predict molecular fingerprints and identify molecular structures. We introduce a family of kernels capturing the similarity of fragmentation trees, and combine these kernels using recently proposed multiple kernel learning approaches. Experiments on two large reference datasets show that the new methods significantly improve molecular fingerprint prediction accuracy. These improvements result in better metabolite identification, doubling the number of metabolites ranked at the top position of the candidates list.},
    doi = {10.1093/bioinformatics/btu275},
    file = {ShenEtAl_MetaboliteIdentificationMultipleKernel_ISMB_2014.pdf:2014/ShenEtAl_MetaboliteIdentificationMultipleKernel_ISMB_2014.pdf:PDF},
    keywords = {jena; IDUN; MS; tandem MS;},
    owner = {fhufsky},
    pmid = {24931979},
    timestamp = {2014.02.11},
    }

2013

  • [DOI] S. Böcker and J. Baumbach, "Cluster editing," in Proc. of computability in europe (cie 2013), 2013, p. 33–44.
    [Bibtex]
    @InProceedings{boecker13cluster,
    author = {Sebastian B\"ocker and Jan Baumbach},
    title = {Cluster Editing},
    booktitle = {Proc. of Computability in Europe (CIE 2013)},
    year = {2013},
    volume = {7921},
    series = lncs,
    pages = {33--44},
    publisher = Springer,
    abstract = {The Cluster Editing problem asks to transform a graph into a disjoint union of cliques using a minimum number of edge modifications. Although the problem has been proven NP-complete several times, it has nevertheless attracted much research both from the theoretical and the applied side. The problem has been the inspiration for numerous algorithms in bioinformatics, aiming at clustering entities such as genes, proteins, phenotypes, or patients. In this paper, we review exact and heuristic methods that have been proposed for the Cluster Editing problem, and also applications of these algorithms for biological problems.},
    doi = {10.1007/978-3-642-39053-1_5},
    keywords = {jena; FPT; cluster editing;},
    opteditor = {Paola Bonizzoni and Vasco Brattka and Benedikt L\"owe},
    owner = {Sebastian},
    timestamp = {2013.03.20},
    }
  • [DOI] S. Böcker, S. Canzar, and G. W. Klau, "The generalized Robinson-Foulds metric," in Proc. of workshop on algorithms in bioinformatics (wabi 2013), 2013, p. 156–169.
    [Bibtex]
    @InProceedings{boecker13generalized,
    author = {Sebastian B\"ocker and Stefan Canzar and Gunnar W. Klau},
    title = {The generalized {Robinson-Foulds} metric},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2013)},
    year = {2013},
    volume = {8126},
    series = lncs,
    pages = {156--169},
    publisher = Springer,
    abstract = {The Robinson-Foulds (RF) metric is arguably the most widely used measure of phylogenetic tree similarity, despite its well-known shortcomings: For example, moving a single taxon in a tree can result in a tree that has maximum distance to the original one; but the two trees are identical if we remove the single taxon. To this end, we propose a natural extension of the RF metric that does not simply count identical clades but instead, also takes similar clades into consideration. In contrast to previous approaches, our model requires the matching between clades to respect the structure of the two trees, a property that the classical RF metric exhibits, too. We show that computing this generalized RF metric is, unfortunately, NP-hard. We then present a simple Integer Linear Program for its computation, and evaluate it by an all-against-all comparison of 100 trees from a benchmark data set. We find that matchings that respect the tree structure differ significantly from those that do not, underlining the importance of this natural condition.},
    doi = {10.1007/978-3-642-40453-5_13},
    keywords = {jena; fpt; phylogenetics; tree distance; Robinson-Foulds;},
    owner = {Sebastian},
    timestamp = {2013.06.21},
    url = {http://arxiv.org/abs/1307.7824},
    }
  • [DOI] M. Brinkmeyer, T. Griebel, and S. Böcker, "FlipCut supertrees: towards matrix representation accuracy in polynomial time," Algorithmica, vol. 67, iss. 2, p. 142–160, 2013.
    [Bibtex]
    @Article{brinkmeyer13flipcut,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {{FlipCut} Supertrees: Towards Matrix Representation Accuracy in Polynomial Time},
    journal = {Algorithmica},
    year = {2013},
    volume = {67},
    number = {2},
    pages = {142--160},
    abstract = {In computational phylogenetics, supertree methods provide a way to reconstruct larger clades of the Tree of Life. The problem of constructing a supertree of a given set of rooted input trees can be formalized in different ways, to cope with contradictory information in the input. In particular, there exist methods based on encoding the input trees in a matrix, and methods based on finding minimum cuts in some graph representing the incompatibility in the input. Evaluations have shown that the former matrix representation methods will compute supertrees of better quality but, unfortunately, the underlying problems are computationally hard, which can heavily constrain the size of reconstructed trees. In contrast, graph-based methods compute a supertree in polynomial time, but these supertrees are inferior in quality. In this paper, we present a novel approach for the computation of supertrees, called FlipCut supertree. Our method combines the computation of minimum cuts from graph-based methods with a matrix representation method, namely Minimum Flip Supertrees. Here, the input trees are encoded in a 0/1/?-matrix, and we present a heuristic to search for a minimum set of 0/1-flips such that the resulting matrix admits a directed perfect phylogeny. We then extend our approach, by using edge weights to weight the columns of the 0/1/?-matrix. In our evaluation, we show that our method is extremely swift in practice, and orders of magnitude faster than the runner up. Concerning supertree accuracy, our method outperforms most polynomial time supertree methods, and is sometimes on par with matrix representation with parsimony.},
    doi = {10.1007/s00453-012-9698-3},
    file = {:2012/BrinkmeyerEtAl_FlipCutSupertreesTowards_Algorithmica_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; FlipCut; supertrees; supertree methods},
    owner = {Sebastian},
    timestamp = {2010.11.10},
    }
  • [DOI] K. Dührkop, M. Ludwig, M. Meusel, and S. Böcker, "Faster mass decomposition," in Proc. of workshop on algorithms in bioinformatics (wabi 2013), 2013, p. 45–58.
    [Bibtex]
    @InProceedings{duehrkop13faster,
    author = {Kai D\"uhrkop and Marcus Ludwig and Marvin Meusel and Sebastian B\"ocker},
    title = {Faster mass decomposition},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2013)},
    year = {2013},
    volume = {8126},
    series = lncs,
    pages = {45--58},
    publisher = Springer,
    abstract = {Metabolomics complements investigation of the genome, transcriptome, and proteome of an organism. Today, the vast majority of metabolites remain unknown, in particular for non-model organisms. Mass spectrometry is one of the predominant techniques for analyzing small molecules such as metabolites. A fundamental step for identifying a small molecule is to determine its molecular formula. Here, we present and evaluate three algorithm engineering techniques that speed up the molecular formula determination. For that, we modify an existing algorithm for decomposing the monoisotopic mass of a molecule. These techniques lead to a four-fold reduction of running times, and reduce memory consumption by up to 94\%. In comparison to the classical search tree algorithm, our algorithm reaches a 1000-fold speedup.},
    doi = {10.1007/978-3-642-40453-5_5},
    keywords = {jena; idun; mass decomposition; MS; mass spectrometry;},
    owner = {Sebastian},
    timestamp = {2013.06.21},
    url = {http://arxiv.org/abs/1307.7805},
    }
  • [DOI] K. Dührkop, K. Scheubert, and S. Böcker, "Molecular formula identification with SIRIUS," Metabolites, vol. 3, p. 506–516, 2013.
    [Bibtex]
    @Article{duehrkop13molecular,
    author = {Kai D\"uhrkop and Kerstin Scheubert and Sebastian B\"ocker},
    title = {Molecular Formula Identification with {SIRIUS}},
    journal = {Metabolites},
    year = {2013},
    volume = {3},
    pages = {506--516},
    abstract = {We present results of the SIRIUS 2 submission to the 2012 CASMI contest. Only results for Category 1 (molecular formula identification) were submitted. The SIRIUS method and the parameters used are briefly described, followed by detailed analysis of the results and a discussion of cases where SIRIUS 2 was unable to come up with the correct molecular formula. SIRIUS 2 returns consistently high quality results, with the exception of fragmentation pattern analysis of time-of-flight data. We then discuss possibilities for further improving SIRIUS 2 in the future.},
    doi = {10.3390/metabo3020506},
    file = {DuehrkopEtAl_MolecularFormulaIdentification_Metabolites_2013.pdf:2013/DuehrkopEtAl_MolecularFormulaIdentification_Metabolites_2013.pdf:PDF},
    keywords = {jena; IDUN; MS; small molecules; isotope patterns; SIRIUS; fragmentation trees; FT;},
    owner = {Sebastian},
    pmid = {24958003},
    timestamp = {2013.04.19},
    }
  • [DOI] K. Jahn, S. Winter, J. Stoye, and S. Böcker, "Statistics for approximate gene clusters," Bmc bioinf, vol. 14, iss. Suppl 15, p. S14, 2013.
    [Bibtex]
    @Article{jahn13statistics,
    author = {Katharina Jahn and Sascha Winter and Jens Stoye and Sebastian B\"ocker},
    title = {Statistics for approximate gene clusters},
    journal = {BMC Bioinf},
    year = {2013},
    volume = {14},
    number = {Suppl 15},
    pages = {S14},
    note = {Proc. of \emph{RECOMB Satelite Workshop on Comparative Genomics} (RECOMB-CG 2013)},
    abstract = {Genes occurring co-localized in multiple genomes can be strong indicators for either functional constraints on the genome organization or remnant ancestral gene order. The computational detection of these patterns, which are usually referred to as gene clusters, has become increasingly sensitive over the past decade. The most powerful approaches allow for both imperfect cluster conservation, i.e. occurrences which can be missing some of the clustered genes, be disrupted by uninvolved genes, or internally rearranged, as well as the complete absence of the cluster in some or even most of the studied genomes. The detection of such low quality clusters increases the risk to mistake faint patterns that occur merely by chance for genuine findings. Therefore, it is crucial to estimate the significance of computational gene cluster predictions and discriminate between true conservation and coincidental clustering. In this paper, we present an efficient and accurate approach to estimate the significance of gene cluster predictions. Given a single gene cluster prediction, we calculate the probability to observe it with the same or a higher degree of conservation under the null hypothesis of random gene order, and add a correction factor to account for multiple testing. Our approach considers all parameters that define the quality of gene cluster conservation: the number of genomes in which the cluster occurs, the number of involved genes, the degree of conservation in the different genomes, as well as the frequency of the clustered genes within each genome. We apply our approach to evaluate gene cluster predictions in a large set of well annotated genomes.},
    doi = {10.1186/1471-2105-14-S15-S14},
    keywords = {jena; 3AGC; gene clusters;},
    owner = {Sebastian},
    pmid = {24564620},
    timestamp = {2013.07.26},
    }
  • S. Neumann, F. Rasche, S. Wolf, and S. Böcker, "Metabolite identification and computational mass spectrometry," in The handbook of plant metabolomics, metabolite profiling and networking, W. Weckwerth and G. Kahl, Eds., Wiley-VCH Verlag, 2013, p. 271–285.
    [Bibtex]
    @InCollection{neumann13metabolite,
    author = {Steffen Neumann and Florian Rasche and Sebastian Wolf and Sebastian B\"ocker},
    title = {Metabolite Identification and Computational Mass Spectrometry},
    booktitle = {The Handbook of Plant Metabolomics, Metabolite Profiling and Networking},
    publisher = {Wiley-VCH Verlag},
    year = {2013},
    editor = {Wolfram Weckwerth and G\"unter Kahl},
    series = {Molecular Plant Biology Handbook Series},
    chapter = {16},
    pages = {271--285},
    abstract = {Previous chapters have introduced protocols and examples for high-throughput metabolomics experiments. Metabolite identification is an important step in these experiments, bridging the metabolomics experiment, metabolite profiling and the biological interpretation of the results. The elemental composition of the individual metabolites is the most basic information that can be calculated already from the MS profiling data. For a more thorough identification, the {\textquotedblleft}interesting{\textquotedblright} peaks are subjected to MS2, or even higher order MSn measurements. Such spectra carry rich structural hints, revealing building blocks of the unknown compound, or allowing comparison with databases of reference spectra. In this chapter we describe a general strategy to identify metabolites, and walk through the steps of the identification for two example compounds, first calculating elemental compositions, performing in silico identification without reference spectra, and finally spectral library lookup.},
    keywords = {Jena},
    owner = {fhufsky},
    timestamp = {2013.05.28},
    }
  • [DOI] I. Rauf, F. Rasche, F. Nicolas, and S. Böcker, "Finding maximum colorful subtrees in practice," J comput biol, vol. 20, iss. 4, p. 1–11, 2013.
    [Bibtex]
    @Article{rauf13finding,
    author = {Imran Rauf and Florian Rasche and Fran\c{c}ois Nicolas and Sebastian B\"ocker},
    title = {Finding Maximum Colorful Subtrees in practice},
    journal = {J Comput Biol},
    year = {2013},
    volume = {20},
    number = {4},
    pages = {1--11},
    abstract = {In metabolomics and other fields dealing with small compounds, mass spectrometry is applied as sensitive high-throughput technique. Recently, fragmentation trees have been proposed to automatically analyze the fragmentation mass spectra recorded by such instruments. Computationally, this leads to the problem of finding a maximum weight subtree in an edge weighted and vertex colored graph, such that every color appears at most once in the solution. We introduce new heuristics and an exact algorithm for this Maximum Colorful Subtree problem, and evaluate them against existing algorithms on real-world datasets. Our tree completion heuristic consistently scores better than other heuristics, while the integer programming-based algorithm produces optimal trees with modest running times. Our fast and accurate heuristic can help to determine molecular formulas based on fragmentation trees. On the other hand, optimal trees from the integer linear program are useful if structure is relevant, e.g., for tree alignments.},
    doi = {10.1089/cmb.2012.0083},
    keywords = {jena; metabolite ms; tandem ms; ms; ft; idun; TrACReview},
    owner = {Sebastian},
    pmid = {23509858},
    timestamp = {2011.02.01},
    }
  • [DOI] K. Scheubert, F. Hufsky, and S. Böcker, "Computational mass spectrometry for small molecules," J cheminform, vol. 5, p. 12, 2013.
    [Bibtex]
    @Article{scheubert13computational,
    author = {Kerstin Scheubert and Franziska Hufsky and Sebastian B\"ocker},
    title = {Computational Mass Spectrometry for Small Molecules},
    journal = {J Cheminform},
    year = {2013},
    volume = {5},
    pages = {12},
    abstract = {The identification of small molecules from mass spectrometry (MS) data remains a major challenge in the interpretation of MS data. This review covers the computational aspects of identifying small molecules, from the identification of a compound searching a reference spectral library, to the structural elucidation of unknowns. In detail, we describe the basic principles and pitfalls of searching mass spectral reference libraries. Determining the molecular formula of the compound can serve as a basis for subsequent structural elucidation; consequently, we cover different methods for molecular formula identification, focussing on isotope pattern analysis. We then discuss automated methods to deal with mass spectra of compounds that are not present in spectral libraries, and provide an insight into de novo analysis of fragmentation spectra using fragmentation trees. In addition, this review shortly covers the reconstruction of metabolic networks using MS data. Finally, we list available software for different steps of the analysis pipeline.},
    doi = {10.1186/1758-2946-5-12},
    file = {ScheubertEtAl_ComputationalMassSpectrometry_JCheminf_2013.pdf:2013/ScheubertEtAl_ComputationalMassSpectrometry_JCheminf_2013.pdf:PDF},
    keywords = {jena; MS; Mass Spectrometry; metabolites; metabolite identification;},
    owner = {fhufsky},
    pmid = {23453222},
    timestamp = {2013.01.02},
    url = {http://www.jcheminf.com/content/5/1/12/abstract},
    }
  • N. Wieseke, M. Lechner, M. Ludwig, and M. Marz, "POMAGO: multiple genome-wide alignment tool for bacteria," in Proc. of international symposium on bioinformatics research and applications (isbra 2013), 2013, p. 249–260.
    [Bibtex]
    @InProceedings{wieseke13pomago,
    author = {Nicolas Wieseke and Marcus Lechner and Marcus Ludwig and Manja Marz},
    title = {{POMAGO}: Multiple Genome-Wide Alignment Tool for Bacteria},
    booktitle = {Proc. of International Symposium on Bioinformatics Research and Applications (ISBRA 2013)},
    year = {2013},
    volume = {7875},
    series = lncs,
    pages = {249--260},
    publisher = Springer,
    abstract = {Multiple Genome-wide Alignments are a first crucial step to compare genomes. Gain and loss of genes, duplications and genomic rearrangements are challenging problems that aggravate with increasing phylogenetic distances. We describe a multiple genome-wide alignment tool for bacteria, called POMAGO, which is based on orthologous genes and their syntenic information determined by Proteinortho.This strategy enables POMAGO to efficiently define anchor points even across wide phylogenetic distances and outperform existing approaches in this field of application. The given set of orthologous genes is enhanced by several cleaning and completion steps, including the addition of previously undetected orthologous genes. Protein-coding genes are aligned on nucleotide and protein level, whereas intergenic regions are aligned on nucleotide level only. We tested and compared our program at three very different sets of bacteria that exhibit different degrees of phylogenetic distances: 1) 15 closely related, well examined and described E. coli species, 2) six more divergent Aquificales, as putative basal bacteria, and 3) a set of eight extreme divergent species, distributed among the whole phylogenetic tree of bacteria. POMAGO is written in a modular way which allows extending or even exchanging algorithms in different stages of the alignment process. Intergenic regions might for instance be aligned using an RNA secondary structure aware algorithm rather than to rely on sequence data alone. The software is freely available from http://www.rna.uni-jena.de/supplements/pomago},
    owner = {fhufsky},
    timestamp = {2014.01.13},
    }

2012

  • [DOI] S. Böcker, "Comment on ''An efficient method to calculate the aggregated isotopic distribution and exact center-masses'' by Claesen et al.," J am soc mass spectrom, vol. 23, iss. 10, p. 1826–1827, 2012.
    [Bibtex]
    @Article{boecker12comment,
    author = {Sebastian B\"ocker},
    title = {Comment on ''{An} Efficient Method to Calculate the Aggregated Isotopic Distribution and Exact Center-Masses'' by {Claesen} et al.},
    journal = {J Am Soc Mass Spectrom},
    year = {2012},
    volume = {23},
    number = {10},
    pages = {1826--1827},
    abstract = {Claesen et al. recently presented an efficient method for computing the isotope pattern of a molecule, that is, both the isotope distribution and the center masses (also called "probability-weighted masses" and "aggregated isotopic variants) of the isotope peaks. They favorably compare their approach against five other methods for simulating isotope patterns: Their tool BRAIN is more accurate than any other tool, and computation times are well below one second even for huge molecules of mass above 533403 Da and when computing 1325 peak masses. Unfortunately, the authors fail to mention SIRIUS that is capable of performing such calculations, too.},
    doi = {10.1007/s13361-012-0402-2},
    file = {Boecker_CommentEfficientMethod_JASMS_2012_preprint.pdf:2012/Boecker_CommentEfficientMethod_JASMS_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; IDUN; Mass Spectrometry; MS; isotope pattern; isotope pattern analysis; SIRIUS},
    owner = {Sebastian},
    pmid = {22673835},
    timestamp = {2012.04.25},
    }
  • [DOI] S. Böcker, "A golden ratio parameterized algorithm for cluster editing," J discrete algorithms, vol. 16, p. 79–89, 2012.
    [Bibtex]
    @Article{boecker12golden,
    author = {Sebastian B\"ocker},
    title = {A golden ratio parameterized algorithm for Cluster Editing},
    journal = {J Discrete Algorithms},
    year = {2012},
    volume = {16},
    pages = {79--89},
    abstract = {The Cluster Editing problem asks to transform a graph by at most k edge modifications into a disjoint union of cliques. The problem is NP-complete, but several parameterized algorithms are known. We present a novel search tree algorithm for the problem, which improves running time from O*(1.76^k) to O*(1.62^k). In detail, we can show that we can always branch with branching vector (2,1) or better, resulting in the golden ratio as the base of the search tree size. Our algorithm uses a well-known transformation to the integer-weighted counterpart of the problem. To achieve our result, we combine three techniques: First, we show that zero-edges in the graph enforce structural features that allow us to branch more efficiently. Second, by repeatedly branching we can isolate vertices, releasing costs. Finally, we use a known characterization of graphs with few conflicts.},
    doi = {10.1016/j.jda.2012.04.005},
    file = {Boecker_GoldenRatioParameterizedAlgorithm_JDiscreteAlgorithms_2012_preprint.pdf:2012/Boecker_GoldenRatioParameterizedAlgorithm_JDiscreteAlgorithms_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; PABI; fpt; cluster editing},
    owner = {Sebastian},
    timestamp = {2011.01.29},
    }
  • [DOI] S. Böcker, Q. B. A. Bui, and A. Truss, "Improved fixed-parameter algorithms for minimum-flip consensus trees," Acm trans algorithms, vol. 8, iss. 1, p. 17 pages, 2012.
    [Bibtex]
    @Article{boecker12improved,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Anke Truss},
    title = {Improved Fixed-Parameter Algorithms for Minimum-Flip Consensus Trees},
    journal = {ACM Trans Algorithms},
    year = {2012},
    volume = {8},
    number = {1},
    pages = {17 pages},
    note = {Article~7},
    abstract = {In computational phylogenetics, the problem of constructing a consensus tree for a given set of rooted input trees has frequently been addressed. In this paper we study the Minimum-Flip Problem: the input trees are transformed into a binary matrix, and we want to find a perfect phylogeny for this matrix using a minimum number of flips, that is, corrections of single entries in the matrix. The graph-theoretical formulation of the problem is as follows: Given a bipartite graph G = (V_t \cup V_c , E), the task is to find a minimum set of edge modifications such that the resulting graph has no induced path with four edges that starts and ends in V_t , where V_t corresponds to the taxa set and Vc corresponds to the character set. We present two fixed-parameter algorithms for the Minimum-Flip Problem, one with running time O(4.83^k + poly(m, n)) and another one with running time O(4.42^k + poly(m, n)) for n taxa, m characters, k flips, and poly(m, n) denotes a polynomial function in m and n. Additionally, we discuss several heuristic improvements. We also report computational results on phylogenetic data.},
    doi = {10.1145/2071379.2071386},
    file = {BoeckerEtAl_ImprovedFixedParameterAlgorithms_TALG_2012_preprint.pdf:2012/BoeckerEtAl_ImprovedFixedParameterAlgorithms_TALG_2012_preprint.pdf:PDF},
    keywords = {jena; FPT; linkpdf; PABI},
    optmonth = {#jan#},
    owner = {baoanh},
    timestamp = {2009.06.02},
    }
  • [DOI] S. Böcker and P. Damaschke, "A note on the parameterized complexity of unordered maximum tree orientation," Discrete appl math, vol. 160, iss. 10-11, p. 1634–1638, 2012.
    [Bibtex]
    @Article{boecker12note,
    author = {Sebastian B\"ocker and Peter Damaschke},
    title = {A Note on the Parameterized Complexity of Unordered Maximum Tree Orientation},
    journal = {Discrete Appl Math},
    year = {2012},
    volume = {160},
    number = {10-11},
    pages = {1634--1638},
    abstract = {In the Unordered Maximum Tree Orientation problem, a set P of paths in a tree and a parameter k is given, and we want to orient the edges in the tree such that all but at most k paths in P become directed paths. This is a more difficult variant of a well-studied problem in computational biology where the directions of paths in P are already given. We show that the parameterized complexity of the unordered version is between Edge Bipartization and Vertex Bipartization, and we give a characterization of orientable path sets in trees by forbidden substructures, which are cycles of a certain kind.},
    doi = {10.1016/j.dam.2012.02.017},
    file = {BoeckerDamaschke_NoteParameterizedComplexity_DAM_2012_preprint.pdf:2012/BoeckerDamaschke_NoteParameterizedComplexity_DAM_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; tree orientation; fpt; PABI},
    owner = {Sebastian},
    timestamp = {2011.11.18},
    }
  • [DOI] F. Hufsky and S. Böcker, "Comparing fragmentation trees from electron impact mass spectra with annotated fragmentation pathways," in Proc. of german conference on bioinformatics (gcb 2012), 2012, p. 12–22.
    [Bibtex]
    @InProceedings{hufsky12comparing,
    author = {Franziska Hufsky and Sebastian B\"ocker},
    title = {Comparing Fragmentation Trees from Electron Impact Mass Spectra with Annotated Fragmentation Pathways},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2012)},
    year = {2012},
    volume = {26},
    series = {OpenAccess Series in Informatics (OASIcs)},
    pages = {12--22},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    abstract = {Electron impact ionization (EI) is the most common form of ionization for GC-MS analysis of small molecules. This ionization method results in a mass spectrum not necessarily containing the molecular ion peak. The fragmentation of small compounds during EI is well understood, but manual interpretation of mass spectra is tedious and time-consuming. Methods for automated analysis are highly sought, but currently limited to database searching and rule-based approaches. With the computation of hypothetical fragmentation trees from high mass GC-MS data the high-throughput interpretation of such spectra may become feasible. We compare these trees with annotated fragmentation pathways. We find that fragmentation trees explain the origin of the ions found in the mass spectra in accordance to the literature. No peak is annotated with an incorrect fragment formula and 78.7\% of the fragmentation processes are correctly reconstructed.},
    doi = {10.4230/OASIcs.GCB.2012.12},
    keywords = {jena; IDUN; fragmentation trees; GC-MS; EI; Electron Impact Ionization;},
    owner = {Sebastian},
    timestamp = {2012.07.13},
    url = {http://drops.dagstuhl.de/opus/volltexte/2012/3714},
    }
  • [DOI] F. Hufsky, K. Dührkop, F. Rasche, M. Chimani, and S. Böcker, "Fast alignment of fragmentation trees," Bioinformatics, vol. 28, iss. 12, p. i265–i273, 2012.
    [Bibtex]
    @Article{hufsky12fast,
    author = {Franziska Hufsky and Kai D\"uhrkop and Florian Rasche and Markus Chimani and Sebastian B\"ocker},
    title = {Fast alignment of fragmentation trees},
    journal = {Bioinformatics},
    year = {2012},
    volume = {28},
    number = {12},
    pages = {i265--i273},
    note = {Proc. of \emph{Intelligent Systems for Molecular Biology} (ISMB 2012)},
    abstract = {Mass spectrometry allows sensitive, automated and high-throughput analysis of small molecules such as metabolites. One major bottleneck in metabolomics is the identification of "unknown" small molecules not in any database. Recently, fragmentation tree alignments have been introduced for the automated comparison of the fragmentation patterns of small molecules. Fragmentation pattern similarities are strongly correlated with the chemical similarity of molecules, and allow us to cluster compounds based solely on their fragmentation patterns. Aligning fragmentation trees is computationally hard. Nevertheless, we present three exact algorithms for the problem: A dynamic programming (DP) algorithm, a sparse variant of the DP, and an Integer Linear Program (ILP). Evaluation of our methods on three different datasets showed that thousands of alignments can be computed in a matter of minutes using DP, even for "challenging" instances. Running times of the sparse DP were an order of magnitude better than for the classical DP. The ILP was clearly outperformed by both DP approaches. We also found that for both DP algorithms, computing the 1\% slowest alignments required as much time as computing the 99\% fastest.},
    doi = {10.1093/bioinformatics/bts207},
    file = {HufskyEtAl_FastAlignmentFragmentationTrees_ISMB_2012.pdf:2012/HufskyEtAl_FastAlignmentFragmentationTrees_ISMB_2012.pdf:PDF},
    keywords = {jena; metabolite ms; ms; idun; ft; tree alignment},
    owner = {Sebastian},
    pmid = {22689771},
    timestamp = {2011.10.19},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/bts207?ijkey=Xeve7Sksc5aV0qP&keytype=ref},
    }
  • [DOI] F. Hufsky, M. Rempt, F. Rasche, G. Pohnert, and S. Böcker, "De novo analysis of electron impact mass spectra using fragmentation trees," Anal chim acta, vol. 739, p. 67–76, 2012.
    [Bibtex]
    @Article{hufsky12de-novo,
    author = {Franziska Hufsky and Martin Rempt and Florian Rasche and Georg Pohnert and Sebastian B\"ocker},
    title = {De Novo Analysis of Electron Impact Mass Spectra Using Fragmentation Trees},
    journal = {Anal Chim Acta},
    year = {2012},
    volume = {739},
    pages = {67--76},
    abstract = {The automated fragmentation analysis of high resolution EI mass spectra based on a fragmentation tree algorithm is introduced. Fragmentation trees are constructed from EI spectra by automated signal extraction and evaluation. These trees explain relevant fragmentation reactions and assign molecular formulas to fragments. The method enables the identification of the molecular ion and the molecular formula of a metabolite if the molecular ion is present in the spectrum. These identifications are independent of existing library knowledge and, thus, support assignment and structural elucidation of unknown compounds. The method works even if the molecular ion is of very low abundance or hidden under contaminants with higher masses. We apply the algorithm to a selection of 50 derivatized and underivatized metabolites and demonstrate that in 78\% of cases the molecular ion can be correctly assigned. The automatically constructed fragmentation trees correspond very well to published mechanisms and allow the assignment of specific relevant fragments and fragmentation pathways even in the most complex EI-spectra in our dataset. This method will be very helpful in the automated analysis of metabolites that are not included in common libraries and it thus has the potential to support the explorative character of metabolomics studies.},
    data = {gcms_data},
    doi = {10.1016/j.aca.2012.06.021},
    file = {HufskyEtAl_DeNovoAnalysisElectron_AnalChimActa_2012.pdf:2012/HufskyEtAl_DeNovoAnalysisElectron_AnalChimActa_2012.pdf:PDF},
    keywords = {jena; IDUN; metabolite ms; gcms; ms; ft; fragmentation trees},
    owner = {Sebastian},
    pmid = {22819051},
    timestamp = {2011.09.17},
    }
  • [DOI] M. Ludwig, F. Hufsky, S. Elshamy, and S. Böcker, "Finding characteristic substructures for metabolite classes," in Proc. of german conference on bioinformatics (gcb 2012), 2012, p. 23–38.
    [Bibtex]
    @InProceedings{ludwig12finding,
    author = {Marcus Ludwig and Franziska Hufsky and Samy Elshamy and Sebastian B\"ocker},
    title = {Finding Characteristic Substructures for Metabolite Classes},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2012)},
    year = {2012},
    volume = {26},
    series = {OpenAccess Series in Informatics (OASIcs)},
    pages = {23--38},
    publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
    abstract = {We introduce a method to find a characteristic substructure for a set of molecular structures. Different from common approaches, such as computing the maximum common subgraph, the resulting substructure does not have to be contained in its exact form in all input molecules. Our approach is part of the identification pipeline for unknown metabolites using fragmentation trees. Searching databases using fragmentation tree alignment results in hit lists containing compounds with large structural similarity to the unknown metabolite. The characteristic substructure of the molecules in the hit list may be a key structural element of the unknown compound and might be used as starting point for structural elucidation. We evaluate our method on different data sets and find that it retrieves essential substructures if the input lists are not too heterogeneous. We apply our method to predict structural elements for five unknown samples from Icelandic poppy.},
    doi = {10.4230/OASIcs.GCB.2012.23},
    keywords = {jena; IDUN; fragmentation trees; FT-BLAST; molecular substructures},
    owner = {Sebastian},
    timestamp = {2012.07.13},
    url = {http://drops.dagstuhl.de/opus/volltexte/2012/3715},
    }
  • [DOI] F. Rasche, K. Scheubert, F. Hufsky, T. Zichner, M. Kai, A. Svatoš, and S. Böcker, "Identifying the unknowns by aligning fragmentation trees," Anal chem, vol. 84, iss. 7, p. 3417–3426, 2012.
    [Bibtex]
    @Article{rasche12identifying,
    author = {Florian Rasche and Kerstin Scheubert and Franziska Hufsky and Thomas Zichner and Marco Kai and Ale\v{s} Svato\v{s} and Sebastian B\"ocker},
    title = {Identifying the unknowns by aligning fragmentation trees},
    journal = {Anal Chem},
    year = {2012},
    volume = {84},
    number = {7},
    pages = {3417--3426},
    abstract = {Mass spectrometry allows sensitive, automated and high-throughput analysis of small molecules. In principle, tandem mass spectrometry allows us to identify ``unknown'' small molecules not in any database, but the automated interpretation of such data is in its infancy. Fragmentation trees have recently been introduced for the automated analysis of the fragmentation patterns of small molecules. We present a method for the automated comparison of such fragmentation patterns, based on aligning the compounds' fragmentation trees. We cluster compounds based solely on their fragmentation patterns, and show a good agreement with known compound classes. Fragmentation pattern similarities are strongly correlated with the chemical similarity of molecules. We present a tool for searching a database for compounds with fragmentation pattern similar to an unknown sample compound. We apply this tool to metabolites from Icelandic poppy. Our method allows fully automated computational identification of small molecules that cannot be found in any database.},
    data = {tandemms_data},
    doi = {10.1021/ac300304u},
    file = {RascheEtAl_IdentifyingUnknownsAligning_AnalChem_2012_preprint.pdf:2012/RascheEtAl_IdentifyingUnknownsAligning_AnalChem_2012_preprint.pdf:PDF},
    keywords = {jena; linkpdf; fragmentation tree; FT; alignment; tree alignment; mass spectrometry; MS; LC-MS; de novo; fragmentation; tandem MS; idun},
    owner = {Sebastian},
    pmid = {22390817},
    timestamp = {2011.08.09},
    }
  • [DOI] I. Rauf, F. Rasche, F. Nicolas, and S. Böcker, "Finding maximum colorful subtrees in practice," in Proc. of research in computational molecular biology (recomb 2012), 2012, p. 213–223.
    [Bibtex]
    @InProceedings{rauf12finding,
    author = {Imran Rauf and Florian Rasche and Fran\c{c}ois Nicolas and Sebastian B\"ocker},
    title = {Finding Maximum Colorful Subtrees in practice},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2012)},
    year = {2012},
    volume = {7262},
    series = lncs,
    pages = {213--223},
    publisher = Springer,
    abstract = {In metabolomics and other fields dealing with small compounds, mass spectrometry is applied as sensitive high-throughput technique. Recently, fragmentation trees have been proposed to automatically analyze the fragmentation mass spectra recorded by such instruments. Computationally, this leads to the problem of finding a maximum weight subtree in an edge weighted and vertex colored graph, such that every color appears at most once in the solution. We introduce new heuristics and an exact algorithm for this Maximum Colorful Subtree problem, and evaluate them against existing algorithms on real-world datasets. Our tree completion heuristic consistently scores better than other heuristics, while the integer programming-based algorithm produces optimal trees with modest running times. Our fast and accurate heuristic can help to determine molecular formulas based on fragmentation trees. On the other hand, optimal trees from the integer linear program are useful if structure is relevant, e.g., for tree alignments.},
    doi = {10.1007/978-3-642-29627-7_22},
    file = {RaufEtAl_FindingMaximumColorful_RECOMB_2012.pdf:2012/RaufEtAl_FindingMaximumColorful_RECOMB_2012.pdf:PDF},
    journalref = {rauf13finding},
    keywords = {jena; metabolite ms; tandem ms; ms; ft; idun; TrACReview},
    owner = {Sebastian},
    timestamp = {2011.02.01},
    url = {http://www.springerlink.com/content/j147r771880655l3/},
    }
  • [DOI] L. Ullmann-Zeunert, A. Muck, N. Wielsch, F. Hufsky, M. A. Stanton, S. Bartram, S. Böcker, I. T. Baldwin, K. Groten, and A. Svatoš, "Determination of 15N-incorporation into plant proteins and their absolute quantitation: a new tool to study nitrogen flux dynamics and protein pool sizes elicited by plant-herbivore interactions," J proteome res, vol. 11, iss. 10, p. 4947–4960, 2012.
    [Bibtex]
    @Article{ullmann-zeunert12determination,
    author = {Lynn Ullmann-Zeunert and Alexander Muck and Natalie Wielsch and Franziska Hufsky and Mariana A. Stanton and Stefan Bartram and Sebastian B\"ocker and Ian T. Baldwin and Karin Groten and Ale\v{s} Svato\v{s}},
    title = {Determination of {15{N}}-incorporation into plant proteins and their absolute quantitation: a new tool to study nitrogen flux dynamics and protein pool sizes elicited by plant-herbivore interactions},
    journal = {J Proteome Res},
    year = {2012},
    volume = {11},
    number = {10},
    pages = {4947--4960},
    abstract = {Nitrogen (N) is an essential and limiting nutrient for plant growth, reproduction, and defense against herbivores. Trade-offs between growth and defense after herbivory are well-studied, but we lack an understanding of the mechanisms responsible for these fitness tradeoffs, mechanisms that involve herbivory-elicited changes in protein abundance and N-flux among tissues, largely due to the lack of suitable methods. Here we demonstrate that a mass spectrometric data-independent acquisition approach known as LC-MSE, combined with a novel algorithm to quantify heavy atom enrichment in peptides is able to quantify elicited changes in protein amounts and 15N flux in a high throughput manner. The quantification of the labeled proteins compares the total ion count (TIC) of peptides derived from proteins of interest to a standard protein (BSA) and the calculation is based on the ratios of the measured spectra to the calculated ideal spectra. The method was able to reliably quantify rabbit phosphorylase b in complex leaf protein extract at concentrations to 300 fmol. The linear dynamic range, reproducibility of technical and biological replicates and differences between measured and expected 15N-incorporation into the small (SSU) and large (LSU) subunits of ribulose-1,5-bisphosphate-carboxylase/oxygenase (RuBisCO) and RuBisCO activase (RCA2) of Nicotiana attenuata plants grown in hydroponic culture at different known concentrations of 15N-labeled nitrate, were used to further evaluate the procedure. The utility of the method for whole-plant studies in ecological realistic contexts was demonstrated by using15N-pulse protocols to plants growing in soil under unknown 15N-incorporation levels. A time-course analysis revealed an exponential increase in 15N-incorporation with maximal values between 6.94 At\% and 8.91 At\%, and distinct patterns of protein amounts of the three proteins over time, while the amounts of total soluble protein declined with leaf age. Additionally we quantified the amounts of lipoxygenase 2 (LOX2) protein, an important enzyme in anti-herbivore defense, in control and elicited plants. The data indicate that N is more rapidly incorporated into LOX2 after attack potentially priming plants for a further attack as suggested by a transient increase in LOX2 levels. This method allows for in-depth quantitative proteomics and15N flux analyses of the metabolic dynamics elicited during plant-herbivore interactions.},
    doi = {10.1021/pr300465n},
    keywords = {jena; proteomics; isotope labeling; heavy isotopes; 15N; plants; herbivores},
    owner = {Sebastian},
    pmid = {22905865},
    timestamp = {2012.06.22},
    }

2011

  • [DOI] S. Böcker, "A golden ratio parameterized algorithm for cluster editing," in Proc. of international workshop on combinatorial algorithms (iwoca 2011), 2011, p. 85–95.
    [Bibtex]
    @InProceedings{boecker11golden,
    author = {Sebastian B\"ocker},
    title = {A golden ratio parameterized algorithm for Cluster Editing},
    booktitle = {Proc. of International Workshop on Combinatorial Algorithms (IWOCA 2011)},
    year = {2011},
    volume = {7056},
    series = lncs,
    pages = {85--95},
    publisher = Springer,
    abstract = {The Cluster Editing problem asks to transform a graph by at most k edge modifications into a disjoint union of cliques. The problem is NP-complete, but several parameterized algorithms are known. We present a novel search tree algorithm for the problem, which improves running time from O*(1.76^k) to O*(1.62^k). In detail, we can show that we can always branch with branching vector (2,1) or better, resulting in the golden ratio as the base of the search tree size. Our algorithm uses a well-known transformation to the integer-weighted counterpart of the problem. To achieve our result, we combine three techniques: First, we show that zero-edges in the graph enforce structural features that allow us to branch more efficiently. Second, by repeatedly branching we can isolate vertices, releasing costs. Finally, we use a known characterization of graphs with few conflicts.},
    doi = {10.1007/978-3-642-25011-8_7},
    file = {Boecker_GoldenRatioParameterized_IWOCA_2011_preprint.pdf:2011/Boecker_GoldenRatioParameterized_IWOCA_2011_preprint.pdf:PDF},
    journalref = {boecker12golden},
    keywords = {jena; linkpdf; PABI; FPT; cluster editing},
    opteditor = {C.S. Iliopoulos and W.F. Smyth},
    owner = {Sebastian},
    timestamp = {2011.01.29},
    }
  • [DOI] S. Böcker, S. Briesemeister, and G. W. Klau, "Exact algorithms for cluster editing: evaluation and experiments," Algorithmica, vol. 60, iss. 2, p. 316–334, 2011.
    [Bibtex]
    @Article{boecker11exact,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Gunnar W. Klau},
    title = {Exact Algorithms for Cluster Editing: Evaluation and Experiments},
    journal = {Algorithmica},
    year = {2011},
    volume = {60},
    number = {2},
    pages = {316--334},
    abstract = {The Cluster Editing problem is defined as follows: Given an undirected, loopless graph, we want to find a set of edge modifications (insertions and deletions) of minimum cardinality, such that the modified graph consists of disjoint cliques. We present empirical results for this problem using exact methods from fixed-parameter algorithmics and linear programming. We investigate parameter-independent data reduction methods and find that effective preprocessing is possible if the number of edge modifications k is smaller than some multiple of $\lvert V\rvert$ , where V is the vertex set of the input graph. In particular, combining parameter-dependent data reduction with lower and upper bounds we can effectively reduce graphs satisfying $k\leq25\lvert V\rvert$ . In addition to the fastest known fixed-parameter branching strategy for the problem, we investigate an integer linear program (ILP) formulation of the problem using a cutting plane approach. Our results indicate that both approaches are capable of solving large graphs with 1000 vertices and several thousand edge modifications. For the first time, complex and very large graphs such as biological instances allow for an exact solution, using a combination of the above techniques.},
    data = {cluster_editing_data},
    doi = {10.1007/s00453-009-9339-7},
    file = {BoeckerEtAl_ExactAlgorithmsClusterEditing_Algorithmica_2011_preprint.pdf:2011/BoeckerEtAl_ExactAlgorithmsClusterEditing_Algorithmica_2011_preprint.pdf:PDF},
    keywords = {jena; linkpdf; FPT; PABI},
    owner = {Sebastian},
    timestamp = {2009.03.08},
    }
  • [DOI] S. Böcker, Q. B. A. Bui, and A. Truss, "Computing bond orders in molecule graphs," Theor comput sci, vol. 412, iss. 12–14, p. 1184–1195, 2011.
    [Bibtex]
    @Article{boecker11computing,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Anke Truss},
    title = {Computing Bond Orders in Molecule Graphs},
    journal = TCS,
    year = {2011},
    volume = {412},
    number = {12--14},
    pages = {1184--1195},
    abstract = {In this paper, we deal with restoring missing information in molecule databases: Many data formats only store the atoms configuration but omit bond multiplicities. As this information is essential for various applications in chemistry, we consider the problem of recovering bond type information using a scoring function for the possible valences of each atom---the Bond Order Assignment problem. We show that Bond Order Assignment is NP-hard, and its maximization version is MAX SNP-hard. We then give two exact fixed-parameter algorithms for the problem, where bond orders are computed via dynamic programming on a tree decomposition of the molecule graph. We evaluate our algorithm on a set of real molecule graphs and find that it works fast in practice.},
    doi = {10.1016/j.tcs.2010.12.063},
    file = {BoeckerEtAl_ComputingBondOrders_TheorComputSci_2011_preprint.pdf:2011/BoeckerEtAl_ComputingBondOrders_TheorComputSci_2011_preprint.pdf:PDF},
    keywords = {jena; linkpdf; PABI; FPT; bond orders; bond order assignment problem},
    owner = {baoanh},
    timestamp = {2010.08.29},
    }
  • [DOI] S. Böcker and P. Damaschke, "Even faster parameterized cluster deletion and cluster editing," Inform process lett, vol. 111, iss. 14, p. 717–721, 2011.
    [Bibtex]
    @Article{boecker11even,
    author = {Sebastian B\"ocker and Peter Damaschke},
    title = {Even faster parameterized cluster deletion and cluster editing},
    journal = IPL,
    year = {2011},
    volume = {111},
    number = {14},
    pages = {717--721},
    issn = {0020-0190},
    abstract = {Cluster Deletion and Cluster Editing ask to transform a graph by at most $k$ edge deletions or edge edits, respectively, into a cluster graph, i.e., disjoint union of cliques. Equivalently, a cluster graph has no conflict triples, i.e., two incident edges without a transitive edge. We solve the two problems in time $O^*(1.415^k)$ and $O^*(1.76^k)$, respectively. These results round off our earlier work by considerably improved time bounds. For Cluster Deletion we use a technique that cuts away small connected components that do no longer contribute to the exponential part of the time complexity. As this idea is simple and versatile, it may lead to improvements for several other parameterized graph problems. The improvement for Cluster Editing is achieved by using the full power of an earlier structure theorem for graphs where no edge is in three conflict triples.},
    doi = {10.1016/j.ipl.2011.05.003},
    file = {:2011/BoeckerDamaschke_EvenFasterParameterized_IPL_2011_preprint.pdf:PDF},
    keywords = {jena; linkpdf; fpt; PABI; cluster editing},
    owner = {Sebastian},
    timestamp = {2011.05.25},
    url = {http://www.sciencedirect.com/science/article/pii/S0020019011001232},
    }
  • [DOI] S. Böcker, B. Kehr, and F. Rasche, "Determination of glycan structure from tandem mass spectra," Ieee/acm trans comput biology bioinform, vol. 8, iss. 4, p. 976–986, 2011.
    [Bibtex]
    @Article{boecker11determination,
    author = {Sebastian B\"ocker and Birte Kehr and Florian Rasche},
    title = {Determination of glycan structure from tandem mass spectra},
    journal = TCBB,
    year = {2011},
    volume = {8},
    number = {4},
    pages = {976--986},
    abstract = {Glycans are molecules made from simple sugars that form complex tree structures. Glycans constitute one of the most important protein modifications, and identification of glycans remains a pressing problem in biology. Unfortunately, the structure of glycans is hard to predict from the genome sequence of an organism. In this paper, we consider the problem of deriving the topology of a glycan solely from tandem mass spectrometry data. We study how to generate glycan tree candidates that suffciently match the sample mass spectrum, avoiding the combinatorial explosion of glycan structures. Unfortunately, the resulting problem is known to be computationally hard. We present an effcient exact algorithm for this problem based on fixed-parameter algorithmics, that can process a spectrum in a matter of seconds. We also report some preliminary results of our method on experimental data, combining it with a preliminary candidate evaluation scheme. We show that our approach is fast in applications, and that we can reach very good de novo identification results. Finally, we show how to count the number of glycan topologies for a fixed size or a fixed mass. We generalize this result to count the number of (labeled) trees with bounded out-degree, improving on results obtained using Polya's enumeration theorem.},
    doi = {10.1109/TCBB.2010.129},
    file = {:2011/BoeckerEtAl_DeterminationGlycanStructure_TCBB_2011.pdf:PDF},
    keywords = {fpt; jena; linkpdf; glycan ms; ms; PABI},
    owner = {Sebastian},
    pmid = {21173459},
    publisher = Springer,
    series = lncs,
    timestamp = {2009.04.06},
    }
  • [DOI] A. Baumgaertel, K. Scheubert, B. Pietsch, K. Kempe, A. C. Crecelius, S. Böcker, and U. S. Schubert, "Analysis of different synthetic homopolymers by the use of a new calculation software for tandem mass spectra," Rapid commun mass spectrom, vol. 25, iss. 12, p. 1765–1778, 2011.
    [Bibtex]
    @Article{baumgaertel11analysis,
    author = {Anja Baumgaertel and Kerstin Scheubert and Bernhard Pietsch and Kristian Kempe and Anna C. Crecelius and Sebastian B\"ocker and Ulrich S. Schubert},
    title = {Analysis of different synthetic homopolymers by the use of a new calculation software for tandem mass spectra},
    journal = {Rapid Commun Mass Spectrom},
    year = {2011},
    volume = {25},
    number = {12},
    pages = {1765--1778},
    abstract = {The manual interpretation of tandem mass spectra of synthetic polymers is very time-consuming. Therefore, a new software tool was developed to accelerate the interpretation of spectra obtained without requiring any further knowledge about the polymer class or the fragmentation behavior under high-energy collision-induced dissociation (CID) conditions. The software only requires an alphabetical list of elements and a peak list of the measured substance as an xml file for the evaluation of the chosen mass spectrum. Tandem mass spectra of different homopolymers, like poly(2-oxazoline)s, poly(ethylene glycol) and poly(styrene), were interpreted by the new software tool. This contribution describes a fast and automated software tool for the rapid analysis of homopolymers.},
    doi = {10.1002/rcm.5019},
    keywords = {jena; polymer MS; schubert},
    owner = {kerstin},
    pmid = {21598337},
    timestamp = {2011.05.25},
    }
  • [DOI] M. Brinkmeyer, T. Griebel, and S. Böcker, "FlipCut supertrees: towards matrix representation accuracy in polynomial time," in Proc. of computing and combinatorics conference (cocoon 2011), 2011, p. 37–48.
    [Bibtex]
    @InProceedings{brinkmeyer11flipcut,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {{FlipCut} Supertrees: Towards Matrix Representation Accuracy in Polynomial Time},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2011)},
    year = {2011},
    volume = {6842},
    series = lncs,
    pages = {37--48},
    publisher = Springer,
    abstract = {In computational phylogenetics, supertree methods provide a way to reconstruct larger clades of the Tree of Life. The problem of constructing a supertree of a given set of rooted input trees can be formalized in different ways, to cope with contradictory information in the input. In particular, there exist methods based on encoding the input trees in a matrix, and methods based on finding minimum cuts in some graph representing the incompatibility in the input. Evaluations have shown that the former matrix representation methods will compute supertrees of better quality but, unfortunately, the underlying problems are computationally hard, which can heavily constrain the size of reconstructed trees. In contrast, graph-based methods compute a supertree in polynomial time, but these supertrees are inferior in quality. In this paper, we present a novel approach for the computation of supertrees, called FlipCut supertree. Our method combines the computation of minimum cuts from graph-based methods with a matrix representation method, namely Minimum Flip Supertrees. Here, the input trees are encoded in a 0/1/?-matrix, and we present a heuristic to search for a minimum set of 0/1-flips such that the resulting matrix admits a directed perfect phylogeny. We then extend our approach, by using edge weights to weight the columns of the 0/1/?-matrix. In our evaluation, we show that our method is extremely swift in practice, and orders of magnitude faster than the runner up. Concerning supertree accuracy, our method outperforms most polynomial time supertree methods, and is sometimes on par with matrix representation with parsimony.},
    doi = {10.1007/978-3-642-22685-4_4},
    file = {:2011/BrinkmeyerEtAl_FlipCutSupertreesTowards_COCOON_2011_preprint.pdf:PDF},
    journalref = {brinkmeyer13flipcut},
    keywords = {jena; linkpdf; FlipCut; supertrees; supertree methods},
    opteditor = {Fu, Bin and Du, Ding-Zhu},
    owner = {Sebastian},
    timestamp = {2010.11.10},
    }
  • [DOI] M. Brinkmeyer, T. Griebel, and S. Böcker, "Polynomial supertree methods revisited," Adv bioinformatics, vol. 2011, iss. Article ID 524182, p. 21 pages, 2011.
    [Bibtex]
    @Article{brinkmeyer11polynomial,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {Polynomial Supertree Methods Revisited},
    journal = {Adv Bioinformatics},
    year = {2011},
    volume = {2011},
    number = {Article ID 524182},
    pages = {21 pages},
    abstract = {Supertree methods allow to reconstruct large phylogenetic trees by combining smaller trees with overlapping leaf sets, into one, more comprehensive supertree. The most commonly used supertree method, matrix representation with parsimony (MRP), produces accurate supertrees but is rather slow due to the underlying hard optimization problem. In this paper, we present an extensive simulation study comparing the performance of MRP and the polynomial supertree methods MinCut Supertree, Modified MinCut Supertree, Build-with-distances, PhySIC, PhySIC IST and super distance matrix. We consider both quality and resolution of the reconstructed supertrees. Our findings illustrate the trade-off between accuracy and running time in supertree construction, as well as the pros and cons of voting- and veto-based supertree approaches. Based on our results, we make some general suggestions for supertree methods yet to come.},
    doi = {10.1155/2011/524182},
    journalref = {brinkmeyer13polynomial},
    keywords = {jena; supertrees; simulation study},
    owner = {Sebastian},
    pmid = {22229028},
    timestamp = {2011.10.04},
    url = {http://www.hindawi.com/journals/abi/2011/524182/},
    }
  • M. Chimani, M. Woste, and S. Böcker, "A closer look at the closest string and closest substring problem," in Proc. of algorithm engineering & experiments (alenex 2011), 2011, p. 13–24.
    [Bibtex]
    @InProceedings{chimani11closer,
    author = {Markus Chimani and Matthias Woste and Sebastian B\"ocker},
    title = {A Closer Look at the Closest String and Closest Substring Problem},
    booktitle = {Proc. of Algorithm Engineering \& Experiments (ALENEX 2011)},
    year = {2011},
    pages = {13--24},
    publisher = {SIAM},
    abstract = {Let $S$ be a set of $k$ strings over an alphabet $\Sigma$; each string has a length between $\ell$ and $n$. The \emph{Closest Substring Problem (CSSP)} is to find a minimal integer $d$ (and a corresponding string $t$ of length $\ell$) such that each string $s\in S$ has a substring of length $\ell$ with Hamming distance at most $d$ to $t$. We say $t$ is the \emph{closest substring} to $S$. For $\ell = n$, this problem is known as the \emph{Closest String Problem (CSP)}. Particularly in computational biology, the CSP and CSSP have found numerous practical applications such as identifying regulatory motifs and approximate gene clusters, and in degenerate primer design. We study ILP formulations for both problems. Our experiments show that a position-based formulation for the CSP performs very well on real-world instances emerging from biology. Even on randomly generated instances that are hard to solve to optimality, solving the root relaxation leads to solutions very close to the optimum. For the CSSP we give a new formulation that is polytope-wise stronger than a straightforward extension of the CSP formulation. Furthermore we propose a strengthening constraint class that speeds up the running time.},
    keywords = {jena; FPT; center string; closest string; ILP},
    owner = {Sebastian},
    timestamp = {2010.11.11},
    url = {http://www.siam.org/proceedings/alenex/2011/alx11_02_chimanim.pdf},
    }
  • [DOI] A. K. Dehof, A. Rurainski, Q. B. A. Bui, S. Böcker, H. Lenhof, and A. Hildebrandt, "Automated bond order assignment as an optimization problem," Bioinformatics, vol. 27, iss. 5, p. 619–625, 2011.
    [Bibtex]
    @Article{dehof11automated,
    author = {Anna Katharina Dehof and Alexander Rurainski and Quang Bao Anh Bui and Sebastian B\"ocker and Hans-Peter Lenhof and Andreas Hildebrandt},
    title = {Automated Bond Order Assignment as an Optimization Problem},
    journal = {Bioinformatics},
    year = {2011},
    volume = {27},
    number = {5},
    pages = {619--625},
    abstract = {Numerous applications in Computational Biology process molecular structures and hence strongly rely not only on correct atomic coordinates, but also on correct bond order information. For proteins and nucleic acids, bond orders can be easily deduced but this does not hold for other types of molecules like ligands. For ligands bond order information is not always provided in molecular databases and thus a variety of approaches tackling this problem have been developed. In this work we extend an ansatz proposed by (Wang, 2006) that assigns connectivity based penalty scores and tries to heuristically approximate its optimum. In this work we present three efficient and exact solvers for the problem replacing the heuristic approximation scheme of the original approach: an A* scheme, an ILP formulation, and an FPT approach. We implemented and evaluated the original implementation, our A*, ILP, and FPT formulation on the MMFF94 validation suite and the KEGG Drug database. We show the benefit of computing exact solutions of the penalty minimization problem and the additional gain when computing all optimal (or even suboptimal) solutions. We close with a detailed comparison of our methods. The A* and ILP solution are integrated into the open-source C++ LGPL library BALL (Kohlbacher, 2000; Hildebrandt, 2010) and the molecular visualization and modelling tool BALLView and can be downloaded from our homepage www.ball-project.org. The FPT implementation can be downloaded from http://bio.informatik.uni-jena.de/software/.},
    doi = {10.1093/bioinformatics/btq718},
    keywords = {jena; linkpdf; FPT; bond orders; bond order assignment problem},
    owner = {baoanh},
    pmid = {21245051},
    timestamp = {2010.08.30},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btq718?ijkey=dFsXGECTjNzzMNQ&keytype=ref},
    }
  • [DOI] K. Grützmann, S. Böcker, and S. Schuster, "Combinatorics of aliphatic amino acids," Naturwissenschaften, vol. 98, iss. 1, p. 79–86, 2011.
    [Bibtex]
    @Article{gruetzmann11combinatorics,
    author = {Konrad Gr\"utzmann and Sebastian B\"ocker and Stefan Schuster},
    title = {Combinatorics of aliphatic amino acids},
    journal = {Naturwissenschaften},
    year = {2011},
    volume = {98},
    number = {1},
    pages = {79--86},
    abstract = {This study combines biology and mathematics, showing that a relatively simple question from molecular biology can lead to complicated mathematics. The question is how to calculate the number of theoretically possible aliphatic amino acids as a function of the number of carbon atoms in the side chain. The presented calculation is based on earlier results from theoretical chemistry concerning alkyl compounds. Mathematical properties of this number series are highlighted. We discuss which of the theoretically possible structures really occur in living organisms, such as leucine and isoleucine with a chain length of four. This is done both for a strict definition of aliphatic amino acids only involving carbon and hydrogen atoms in their side chain and for a less strict definition allowing sulphur, nitrogen and oxygen atoms. While the main focus is on proteinogenic amino acids, we also give several examples of non-proteinogenic aliphatic amino acids, playing a role, for instance, in signalling. The results are in agreement with a general phenomenon found in biology: Usually, only a small number of molecules are chosen as building blocks to assemble an inconceivable number of different macromolecules as proteins. Thus, natural biological complexity arises from the multifarious combination of building blocks.},
    doi = {10.1007/s00114-010-0743-2},
    keywords = {jena; counting trees},
    owner = {Sebastian},
    pmid = {21120449},
    timestamp = {2010.12.08},
    }
  • [DOI] F. Hufsky, L. Kuchenbecker, K. Jahn, J. Stoye, and S. Böcker, "Swiftly computing center strings," Bmc bioinf, vol. 12, p. 106, 2011.
    [Bibtex]
    @Article{hufsky11swiftly,
    author = {Franziska Hufsky and L\'eon Kuchenbecker and Katharina Jahn and Jens Stoye and Sebastian B\"ocker},
    title = {Swiftly computing center strings},
    journal = {BMC Bioinf},
    year = {2011},
    volume = {12},
    pages = {106},
    abstract = {Background. The center string (or closest string) problem is a classic computer science problem with important applications in computational biology. Given k input strings and a distance threshold d, we search for a string within Hamming distance at most d to each input string. This problem is NP complete. Results. In this paper, we focus on exact methods for the problem that are also swift in application. We first introduce data reduction techniques that allow us to infer that certain instances have no solution, or that a center string must satisfy certain conditions. We describe how to use this information to speed up two previously published search tree algorithms. Then, we describe a novel iterative search strategy that is efficient in practice, where some of our reduction techniques can also be applied. Finally, we present results of an evaluation study for two different data sets from a biological application. Conclusions. We find that the running time for computing the optimal center string is dominated by the subroutine calls for d=dopt-1 and d=dopt. Our data reduction is very effective for both, either rejecting unsolvable instances or solving trivial positions. We find that this speeds up computations considerably.},
    data = {center_strings},
    doi = {10.1186/1471-2105-12-106},
    keywords = {jena; 3AGC; center string, closest string, gene clusters},
    owner = {Sebastian},
    pmid = {21504573},
    timestamp = {2011.04.03},
    url = {http://www.biomedcentral.com/1471-2105/12/106},
    }
  • [DOI] F. Rasche, A. Svatoš, R. K. Maddula, C. Böttcher, and S. Böcker, "Computing fragmentation trees from tandem mass spectrometry data," Anal chem, vol. 83, iss. 4, p. 1243–1251, 2011.
    [Bibtex]
    @Article{rasche11computing,
    author = {Florian Rasche and Ale\v{s} Svato\v{s} and Ravi Kumar Maddula and Christoph B\"ottcher and Sebastian B\"ocker},
    title = {Computing fragmentation trees from tandem mass spectrometry data},
    journal = {Anal Chem},
    year = {2011},
    volume = {83},
    number = {4},
    pages = {1243--1251},
    abstract = {The structural elucidation of organic compounds in complex biofluids and tissues remains a significant analytical challenge. For mass spectrometry, the manual interpretation of collision-induced dissociation (CID) mass spectra is cumbersome and requires expert-knowledge, as the fragmentation mechanisms of ions formed from small molecules are not completely understood. The automated identification of compounds is generally limited to searching in spectral libraries. Here, we present a method for interpreting the CID spectra of the organic compounds protonated ions by computing fragmentation trees that establish not only the molecular formula of the compound and all fragment ions, but also the dependencies between fragment ions. This is an important step towards the automated identification of unknowns from the CID spectra of compounds that are not in any database.},
    data = {tandemms_data},
    doi = {10.1021/ac101825k},
    erratum = {http://pubs.acs.org/doi/abs/10.1021/ac201785d},
    file = {RascheEtAl_ComputingFragmentationTrees_AnalChem_2011.pdf:2011/RascheEtAl_ComputingFragmentationTrees_AnalChem_2011.pdf:PDF},
    keywords = {jena; Mass Spectrometry; ms; metabolite ms; tandem ms; metabolite identification; ft; idun; TrACReview},
    owner = {Sebastian},
    pmid = {21182243},
    timestamp = {2009.09.30},
    }
  • [DOI] K. Scheubert, F. Hufsky, F. Rasche, and S. Böcker, "Computing fragmentation trees from metabolite multiple mass spectrometry data," in Proc. of research in computational molecular biology (recomb 2011), 2011, p. 377–391.
    [Bibtex]
    @InProceedings{scheubert11computing,
    author = {Kerstin Scheubert and Franziska Hufsky and Florian Rasche and Sebastian B\"ocker},
    title = {Computing fragmentation trees from metabolite multiple mass spectrometry data},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2011)},
    year = {2011},
    volume = {6577},
    series = lncs,
    pages = {377--391},
    publisher = Springer,
    abstract = {Since metabolites cannot be predicted from the genome sequence, high-throughput de-novo identification of small molecules is highly sought. Mass spectrometry (MS) in combination with a fragmentationtechnique is commonly used for this task. Unfortunately, automated analysis of such data is in its infancy. Recently, fragmentation trees have been proposed as an analysis tool for such data. Additional fragmentation steps (MS^n) reveal more information about the molecule. We propose to use MS^n data for the computation of fragmentation trees, and present the Colorful Subtree Closure problem to formalize this task: There, we search for a colorful subtree inside a vertex-colored graph, such that the weight of the transitive closure of the subtree is maximal. We give several negative results regarding the tractability and approximability of this and related problems. We then present an exact dynamic programming algorithm, which is parameterized by the number of colors in the graph and is swift in practice. Evaluation of our method on a dataset of 45 reference compounds showed that the quality of constructed fragmentation trees is improved by using MS^n instead of MS^2 measurements.},
    doi = {10.1007/978-3-642-20036-6_36},
    file = {:2011/ScheubertEtAl_ComputingFragmentationTrees_RECOMB_2011_preprint.pdf:PDF},
    journalref = {scheubert11computing-jcb},
    keywords = {jena; linkpdf; MS; Mass Spectrometry; metabolites; metabolite identification; multiple MS; MSn; metabolite ms; ft; idun;},
    owner = {Sebastian},
    timestamp = {2010.12.07},
    }
  • [DOI] K. Scheubert, F. Hufsky, F. Rasche, and S. Böcker, "Computing fragmentation trees from metabolite multiple mass spectrometry data," J comput biol, vol. 18, iss. 11, p. 1383–1397, 2011.
    [Bibtex]
    @Article{scheubert11computing-jcb,
    author = {Kerstin Scheubert and Franziska Hufsky and Florian Rasche and Sebastian B\"ocker},
    title = {Computing Fragmentation Trees from Metabolite Multiple Mass Spectrometry Data},
    journal = {J Comput Biol},
    year = {2011},
    volume = {18},
    number = {11},
    pages = {1383--1397},
    abstract = {Since metabolites cannot be predicted from the genome sequence, high-throughput de novo identification of small molecules is highly sought. Mass spectrometry (MS) in combination with a fragmentation technique is commonly used for this task. Unfortunately, automated analysis of such data is in its infancy. Recently, fragmentation trees have been proposed as an analysis tool for such data. Additional fragmentation steps (MSn) reveal more information about the molecule. We propose to use MSn data for the computation of fragmentation trees, and present the Colorful Subtree Closure problem to formalize this task: There, we search for a colorful subtree inside a vertex-colored graph, such that the weight of the transitive closure of the subtree is maximal. We give several negative results regarding the tractability and approximability of this and related problems. We then present an exact dynamic programming algorithm, which is parameterized by the number of colors in the graph and is swift in practice. Evaluation of our method on a dataset of 45 reference compounds showed that the quality of constructed fragmentation trees is improved by using MSn instead of MS2 measurements.},
    doi = {10.1089/cmb.2011.0168},
    keywords = {jena; mass spectrometry; fragmentation tree; multiple MS; small molecules; metabolite ms; ft; idun; ms; MSn; TrACReview},
    pmid = {22035289},
    }
  • [DOI] T. Wittkop, D. Emig, A. Truss, M. Albrecht, S. Böcker, and J. Baumbach, "Comprehensive cluster analysis with transitivity clustering," Nat protocols, vol. 6, p. 285–295, 2011.
    [Bibtex]
    @Article{wittkop11comprehensive,
    author = {Tobias Wittkop and Dorothea Emig and Anke Truss and Mario Albrecht and Sebastian B\"ocker and Jan Baumbach},
    title = {Comprehensive cluster analysis with Transitivity Clustering},
    journal = {Nat Protocols},
    year = {2011},
    volume = {6},
    pages = {285--295},
    abstract = {Transitivity Clustering is a method for the partitioning of biological data, a long-standing challenge in computational biology. It is based on weighted transitive graph projection and provides integrated access to various functions, in order to addressing each step of point in a typical cluster analysis. To facilitate this, Transitivity Clustering offers three user-friendly interfaces: a powerful stand-alone version, a web interface, and a collection of Cytoscape plug-ins. Here, we describe three major workflows: (1) integrated protein sequence cluster analysis with Cytoscape; (2) semi-supervised protein family detection; and (3) clustering of gene expression data. This protocol guides the user through the most important features of Transitivity Clustering and takes one hour to complete.},
    doi = {10.1038/nprot.2010.197},
    keywords = {jena; cluster editing; transitivity clustering; TransClust},
    owner = {Sebastian},
    pmid = {21372810},
    timestamp = {2010.11.10},
    }
  • [DOI] T. Wittkop, S. Rahmann, S. Böcker, and J. Baumbach, "Extension and robustness of transitivity clustering for protein-protein interaction network analysis," Internet math, vol. 7, iss. 4, p. 255–273, 2011.
    [Bibtex]
    @Article{wittkop11extension,
    author = {Tobias Wittkop and Sven Rahmann and Sebastian B\"ocker and Jan Baumbach},
    title = {Extension and robustness of Transitivity Clustering for protein-protein interaction network analysis},
    journal = {Internet Math},
    year = {2011},
    volume = {7},
    number = {4},
    pages = {255--273},
    abstract = {Partitioning biological data objects into groups, such that the objects within the groups share common traits, is a long-standing challenge in computational biology. Recently, we developed and established Transitivity Clustering, a partitioning approach based on Weighted Transitive Graph Projection that utilizes a single similarity threshold as density parameter. In previous publications, we concentrated on the graphical user interface and on concrete biomedical application protocols. Here, we contribute the following theoretical considerations: (1) We provide proofs that the average similarity between objects from the same cluster is above the user-given threshold and the average similarity between objects from different clusters is below the threshold. (2) We extend Transitivity Clustering to an overlappingclustering tool by integrating two new approaches. (3) We demonstrate the power of Transitivity Clustering for protein complex detection. We evaluate our approaches against others by utilizing gold standard data that was previously used by Broh\'ee et al. for reviewing existing bioinformatics clustering tools. The extended version of Transitivity Clustering is available online at http://transclust.cebitec.uni-bielefeld.de.},
    doi = {10.1080/15427951.2011.604559},
    keywords = {jena; cluster editing},
    optmonth = {#may#},
    owner = {Sebastian},
    timestamp = {2010.11.16},
    url = {http://transclust.cebitec.uni-bielefeld.de},
    }

2010

  • [DOI] M. Brinkmeyer, T. Griebel, and S. Böcker, "Polynomial supertree methods revisited," in Proc. of pattern recognition in bioinformatics (prib 2010), 2010, p. 183–194.
    [Bibtex]
    @InProceedings{brinkmeyer10polynomial,
    author = {Malte Brinkmeyer and Thasso Griebel and Sebastian B\"ocker},
    title = {Polynomial Supertree Methods Revisited},
    booktitle = {Proc. of Pattern Recognition in Bioinformatics (PRIB 2010)},
    year = {2010},
    volume = {6282},
    series = lncs,
    pages = {183--194},
    publisher = Springer,
    abstract = {Supertree methods allow to reconstruct large phylogenetic trees by combining smaller trees with overlapping leaf sets, into one, more comprehensive supertree. The most commonly used supertree method, matrix representation with parsimony (MRP), produces accurate supertrees but is rather slow due to the underlying hard optimization problem. In this paper, we present an extensive simulation study comparing the performance of MRP and the polynomial supertree methods MinCut Supertree, Modified MinCut Supertree, Build-with-distances, PhySIC, and PhySIC IST. We consider both quality and resolution of the reconstructed supertrees. Our findings illustrate the trade-off between accuracy and running time in supertree construction, as well as the pros and cons of voting- and veto-based supertree approaches.},
    doi = {10.1007/978-3-642-16001-1_16},
    file = {:2010/BrinkmeyerEtAl_PolynomialSupertreeMethodsRevisited_PRIB_2010.pdf:PDF},
    journalref = {brinkmeyer11polynomial},
    keywords = {jena; linkpdf; phylogenetics, supertrees},
    owner = {Sebastian},
    timestamp = {2010.06.27},
    }
  • [DOI] M. Chimani, S. Rahmann, and S. Böcker, "Exact ILP solutions for phylogenetic minimum flip problems," in Proc. of acm conf. on bioinformatics and computational biology (acm-bcb 2010), 2010, p. 147–153.
    [Bibtex]
    @InProceedings{chimani10exact,
    author = {Markus Chimani and Sven Rahmann and Sebastian B\"ocker},
    title = {Exact {ILP} Solutions for Phylogenetic Minimum Flip Problems},
    booktitle = {Proc. of ACM Conf. on Bioinformatics and Computational Biology (ACM-BCB 2010)},
    year = {2010},
    pages = {147--153},
    publisher = {ACM press, New York},
    abstract = {In computational phylogenetics, the problem of constructing a consensus tree or supertree of a given set of rooted input trees can be formalized in different ways. We consider the Minimum Flip Consensus Tree and Minimum Flip Supertree problem, where input trees are transformed into a 0/1/?-matrix, such that each row represents a taxon, and each column represents a subtree membership. For the consensus tree problem, all input trees contain the same set of taxa, and no ?-entries occur. For the supertree problem, the input trees may contain different subsets of the taxa, and unrepresented taxa are coded with ?-entries. In both cases, the goal is to find a perfect phylogeny for the input matrix requiring a minimum number of 0/1-flips, i.e., matrix entry corrections. Both optimization problems are NP-hard. We present the first efficient Integer Linear Programming (ILP) formulations for both problems, using three distinct characterizations of a perfect phylogeny. Although these three formulations seem to differ considerably at first glance, we show that they are in fact polytope-wise equivalent. Introducing a novel column generation scheme, it turns out that the simplest, purely combinatorial formulation is the most efficient one in practice. Using our framework, it is possible to find exact solutions for instances with ~100 taxa.},
    doi = {10.1145/1854776.1854800},
    file = {:2010/ChimaniEtAl_ExactILPSolutionsMinFlip_ACMBCB_2010.pdf:PDF},
    keywords = {jena; linkpdf; FlipCut; exact methods, ilp, flip supertrees},
    owner = {Sebastian},
    timestamp = {2010.02.21},
    }
  • [DOI] G. Govind, O. Mittapalli, T. Griebel, S. Allmann, S. Böcker, and I. T. Baldwin, "Unbiased transcriptional comparisons of generalist and specialist herbivores feeding on progressively defenseless \emphNicotiana attenuata plants," Plos one, vol. 5, iss. 1, p. e8735, 2010.
    [Bibtex]
    @Article{govind10unbiased,
    author = {Govind, Geetha and Mittapalli, Omprakash and Griebel, Thasso and Allmann, Silke and B\"ocker, Sebastian and Baldwin, Ian Thomas},
    title = {Unbiased Transcriptional Comparisons of Generalist and Specialist Herbivores Feeding on Progressively Defenseless \emph{{Nicotiana} attenuata} Plants},
    journal = {PLoS One},
    year = {2010},
    volume = {5},
    number = {1},
    pages = {e8735},
    abstract = {Background Herbivore feeding elicits dramatic increases in defenses, most of which require jasmonate (JA) signaling, and against which specialist herbivores are thought to be better adapted than generalist herbivores. Unbiased transcriptional analyses of how neonate larvae cope with these induced plant defenses are lacking. Methodology/Principal Findings We created cDNA microarrays for Manduca sexta and Heliothis virescens separately, by spotting normalized midgut-specific cDNA libraries created from larvae that fed for 24 hours on MeJA-elicited wild-type (WT) Nicotiana attenuata plants. These microarrays were hybridized with labeled probes from neonates that fed for 24 hours on WT and isogenic plants progressively silenced in JA-mediated defenses (N: nicotine; N/PI: N and trypsin protease inhibitors; JA: all JA-mediated defenses). H. virescens neonates regulated 16 times more genes than did M. sexta neonates when they fed on plants silenced in JA-mediated defenses, and for both species, the greater the number of defenses silenced in the host plant (JA > N/PI > N), the greater were the number of transcripts regulated in the larvae. M. sexta larvae tended to down-regulate while H. virescens larvae up- and down-regulated transcripts from the same functional categories of genes. M. sexta larvae regulated transcripts in a diet-specific manner, while H. virescens larvae regulated a similar suite of transcripts across all diet types. Conclusions/Significance The observations are consistent with the expectation that specialists are better adapted than generalist herbivores to the defense responses elicited in their host plants by their feeding. While M. sexta larvae appear to be better adapted to N. attenuata's defenses, some of the elicited responses remain effective defenses against both herbivore species. The regulated genes provide novel insights into larval adaptations to N. attenuata's induced defenses, and represent potential targets for plant-mediated RNAi to falsify hypotheses about the process of adaptation.},
    doi = {10.1371/journal.pone.0008735},
    file = {:2010/GodvindEtAl_UnbiasedTranscriptionalComparisons_PLoSone_2010.pdf:PDF},
    keywords = {jena; microarray, cDNA, EST},
    pmid = {20090945},
    publisher = {Public Library of Science},
    }
  • [DOI] F. Hufsky, L. Kuchenbecker, K. Jahn, J. Stoye, and S. Böcker, "Swiftly computing center strings," in Proc. of workshop on algorithms in bioinformatics (wabi 2010), 2010, p. 325–336.
    [Bibtex]
    @InProceedings{hufsky10swiftly,
    author = {Franziska Hufsky and L\'eon Kuchenbecker and Katharina Jahn and Jens Stoye and Sebastian B\"ocker},
    title = {Swiftly computing center strings},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2010)},
    year = {2010},
    volume = {6293},
    series = lncs,
    pages = {325--336},
    publisher = Springer,
    abstract = {The center string (or closest string) problem is a classical computer science problem with important applications in computational biology. Given $k$ input strings and a distance threshold $d$, we search for a string within Hamming distance $d$ to each input string. This problem is NP-complete. In this paper, we focus on exact methods for the problem that are also fast in application. First, we introduce data reduction techniques that allow us to infer that certain instances have no solution, or that a center string must satisfy certain conditions. Then, we describe a novel search tree strategy that is very efficient in practice. Finally, we present results of an evaluation study for instances from a biological application. We find that data reduction is mandatory for the notoriously difficult case $d=\dopt-1$.},
    data = {center_strings},
    doi = {10.1007/978-3-642-15294-8_27},
    file = {:2010/HufskyEtAl_SwiftlyComputingCenterStrings_WABI_2010.pdf:PDF},
    journalref = {hufsky11swiftly},
    keywords = {jena; 3AGC; center string, closest string, gene clusters},
    owner = {Sebastian},
    timestamp = {2010.05.29},
    }
  • [DOI] S. Neumann and S. Böcker, "Computational mass spectrometry for metabolomics – a review," Anal bioanal chem, vol. 398, iss. 7, p. 2779–2788, 2010.
    [Bibtex]
    @Article{neumann10computational,
    author = {Steffen Neumann and Sebastian B\"ocker},
    title = {Computational Mass Spectrometry for Metabolomics -- A Review},
    journal = {Anal Bioanal Chem},
    year = {2010},
    volume = {398},
    number = {7},
    pages = {2779--2788},
    abstract = {The identification of compounds from mass spectrometry (MS) data is still seen as a major bottleneck in the interpretation of MS data. This is particularly the case for the identification of small compounds such as metabolites, where until recently little progress has been made. Here we review the available approaches to annotation and identification of chemical compounds based on electrospray ionization (ESI-MS) data. The methods are not limited to metabolomics applications, but are applicable to any small compounds amenable to MS analysis. Starting with the definition of identification, we focus on the analysis of tandem mass and $MS^n$ spectra, which can provide a wealth of structural information. Searching in libraries of reference spectra provides the most reliable source of identification, especially if measured on comparable instruments. We review several choices for the distance functions. The identification without reference spectra is even more challenging, because it requires approaches to interpret tandem mass spectra with regard to the molecular structure. Both commercial and free tools are capable of mining general-purpose compound libraries, and identifying candidate compounds. The holy grail of computational mass spectrometry is the de novo deduction of structure hypotheses for compounds, where method development has only started thus far. In a case study, we apply several of the available methods to the three compounds, kaempferol, reserpine, and verapamil, and investigate whether this results in reliable identifications.},
    doi = {10.1007/s00216-010-4142-5},
    file = {NeumannBoecker_ComputationalMassSpectrometry_AnalBioanalChem_2010_reprint.pdf:2010/NeumannBoecker_ComputationalMassSpectrometry_AnalBioanalChem_2010_reprint.pdf:PDF},
    keywords = {jena; linkpdf; MS; Mass Spectrometry; metabolites; metabolite MS; isotope patterns; TrACReview},
    owner = {Sebastian},
    pmid = {20936272},
    timestamp = {2010.08.23},
    }
  • [DOI] T. Wittkop, D. Emig, S. Lange, S. Rahmann, M. Albrecht, J. H. Morris, S. Böcker, J. Stoye, and J. Baumbach, "Partitioning biological data with transitivity clustering," Nat methods, vol. 7, iss. 6, p. 419–420, 2010.
    [Bibtex]
    @Article{wittkop10partitioning,
    author = {Tobias Wittkop and Dorothea Emig and Sita Lange and Sven Rahmann and Mario Albrecht and John H. Morris and Sebastian B\"ocker and Jens Stoye and Jan Baumbach},
    title = {Partitioning biological data with transitivity clustering},
    journal = {Nat Methods},
    year = {2010},
    volume = {7},
    number = {6},
    pages = {419--420},
    abstract = {Clustering is a common computational technique for data analysis in the life sciences. Essentially one defines clustering as a partitioning of arbitrary data objects into groups, such that the objects in each group, or cluster, have common traits, with respect to a similarity function. Ideally, objects from the same cluster are more similar to each other than to objects from different clusters. A density parameter controls the size and the number of resulting clusters. Here we present `transitivity clustering', an alternative way to partition biological data, with easy-to-use interfaces: (i) a web interface for a quick analysis of medium-sized datasets, (ii) a powerful stand-alone Java implementation for large-scale data clustering, and (iii) a collection of Cytoscape plug-ins that also provide methods to answer typical follow-up questions.},
    doi = {10.1038/nmeth0610-419},
    file = {:2010/WittkopEtAl_PartitioningBiologicalData_NatMethods_2010.pdf:PDF},
    keywords = {jena; linkpdf; cluster editing, clustering, fpt},
    owner = {Sebastian},
    pmid = {20508635},
    timestamp = {2010.04.27},
    }

2009

  • [DOI] S. Böcker, S. Briesemeister, Q. B. A. Bui, and A. Truss, "Going weighted: parameterized algorithms for cluster editing," Theor comput sci, vol. 410, iss. 52, p. 5467–5480, 2009.
    [Bibtex]
    @Article{boecker09going,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Quang Bao Anh Bui and Anke Truss},
    title = {Going Weighted: Parameterized Algorithms for Cluster Editing},
    journal = TCS,
    year = {2009},
    volume = {410},
    number = {52},
    pages = {5467--5480},
    abstract = {The goal of the Cluster Editing problem is to make the fewest changes to the edge set of an input graph such that the resulting graph is a disjoint union of cliques. This problem is NP-complete but recently, several parameterized algorithms have been proposed. In this paper, we present a number of surprisingly simple search tree algorithms for Weighted Cluster Editing assuming that edge insertion and deletion costs are positive integers. We show that the smallest search tree has size O(1.82^k) for edit cost k, resulting in the currently fastest parameterized algorithm, both for this problem and its unweighted counterpart. We have implemented and compared our algorithms, and achieved promising results.},
    data = {cluster_editing_data},
    doi = {10.1016/j.tcs.2009.05.006},
    file = {:2009/BoeckerEtAl_GoingWeightedParameterized_TheorComputSci_2009.pdf:PDF},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {Sebastian},
    }
  • [DOI] S. Böcker, S. Briesemeister, and G. W. Klau, "On optimal comparability editing with applications to molecular diagnostics," Bmc bioinf, vol. 10, iss. Suppl 1, p. S61, 2009.
    [Bibtex]
    @Article{boecker09optimal,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Gunnar W. Klau},
    title = {On optimal comparability editing with applications to molecular diagnostics},
    journal = {BMC Bioinf},
    year = {2009},
    volume = {10},
    number = {Suppl 1},
    pages = {S61},
    note = {Proc. of \emph{Asia-Pacific Bioinformatics Conference} (APBC 2009)},
    abstract = {The COMPARABILITY EDITING problem appears in the context of hierarchical disease classification based on noisy data. We are given a directed graph G representing hierarchical relationships between patient subgroups. The task is to identify the minimum number of edge insertions or deletions to transform G into a transitive graph, that is, if edges (u, v) and (v, w) are present then edge (u, w) must be present, too. We present two new approaches for the problem based on fixed-parameter algorithmics and integer linear programming. In contrast to previously used heuristics, our approaches compute provably optimal solutions. Our computational results demonstrate that our exact algorithms are by far more efficient in practice than a previously used heuristic approach. In addition to the superior running time performance, our algorithms are capable of enumerating all optimal solutions, and naturally solve the weighted version of the problem.},
    doi = {10.1186/1471-2105-10-S1-S61},
    keywords = {jena; FPT; linkpdf; PABI; transitivity editing;},
    owner = {Sebastian},
    pmid = {19208165},
    timestamp = {2007.10.17},
    url = {http://www.biomedcentral.com/1471-2105/10/S1/S61},
    }
  • [DOI] S. Böcker, Q. B. A. Bui, P. Seeber, and A. Truss, "Computing bond types in molecule graphs," in Proc. of computing and combinatorics conference (cocoon 2009), 2009, p. 297–306.
    [Bibtex]
    @InProceedings{boecker09computing,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Patrick Seeber and Anke Truss},
    title = {Computing Bond Types in Molecule Graphs},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2009)},
    year = {2009},
    volume = {5609},
    series = lncs,
    pages = {297--306},
    publisher = Springer,
    abstract = {In this paper we deal with restoring missing information in molecule databases: Some data formats only store the atoms' configuration but omit bond multiplicities. As this information is essential for various applications in chemistry, we consider the problem of recovering bond type information using a scoring function for the possible valences of each atom---the Bond Type Assignment problem. We prove the NP-hardness of Bond Type Assignment and give an exact fixed-parameter algorithm for the problem where bond types are computed via dynamic programming on a tree decomposition of the molecule graph. We evaluate our algorithm on a set of real molecule graphs and find that it works fast and accurately.},
    doi = {10.1007/978-3-642-02882-3_28},
    file = {BoeckerEtAl_ComputingBondTypesMoleculeGraphs_Cocoon_2009.pdf:2009/BoeckerEtAl_ComputingBondTypesMoleculeGraphs_Cocoon_2009.pdf:PDF},
    journalref = {boecker11computing},
    keywords = {jena; FPT; linkpdf; tree decomposition; PABI},
    owner = {baoanh},
    timestamp = {2009.04.06},
    }
  • [DOI] S. Böcker, F. Hüffner, A. Truss, and M. Wahlström, "A faster fixed-parameter approach to drawing binary tanglegrams," in Proc. of international workshop on parameterized and exact computation (iwpec 2009), 2009, p. 38–49.
    [Bibtex]
    @InProceedings{boecker09faster,
    author = {Sebastian B\"ocker and Falk H\"uffner and Anke Truss and Magnus Wahlstr\"om},
    title = {A faster fixed-parameter approach to drawing binary tanglegrams},
    booktitle = {Proc. of International Workshop on Parameterized and Exact Computation (IWPEC 2009)},
    year = {2009},
    volume = {5917},
    series = lncs,
    pages = {38--49},
    publisher = Springer,
    abstract = {Given two binary phylogenetic trees covering the same n species, it is useful to compare them by drawing them with leaves arranged side-by-side. To facilitate comparison, we would like to arrange the trees to minimize the number of crossings k induced by connecting pairs of identical species. This is the NP-hard Tanglegram Layout problem. By providing a fast transformation to the Balanced Subgraph problem, we show that the problem admits an O(2^k n^4) algorithm, improving upon a previous fixed-parameter approach with running time O(c^k n^O(1)) where c \approx 1000. We enhance a Balanced Subgraph implementation based on data reduction and iterative compression with improvements tailored towards these instances, and run experiments with real-world data to show the practical applicability of this approach. All practically relevant (k <= 1000) Tanglegram Layout instances can be solved exactly within seconds. Additionally, we provide a kernel-like bound by showing how to reduce the Balanced Subgraph instances for Tanglegram Layout on complete binary trees to a size of O(k log k).},
    doi = {10.1007/978-3-642-11269-0_3},
    file = {:2009/BoeckerEtAl_FasterFixedParameterApproach_IWPEC_2009.pdf:PDF},
    keywords = {jena; linkpdf; fpt; PABI},
    owner = {anke},
    timestamp = {2009.07.08},
    }
  • [DOI] S. Böcker, K. Jahn, J. Mixtacki, and J. Stoye, "Computation of median gene clusters," J comput biol, vol. 16, iss. 8, p. 1085–1099, 2009.
    [Bibtex]
    @Article{boecker09computation,
    author = {Sebastian B\"ocker and Katharina Jahn and Julia Mixtacki and Jens Stoye},
    title = {Computation of median gene clusters},
    journal = {J Comput Biol},
    year = {2009},
    volume = {16},
    number = {8},
    pages = {1085--1099},
    abstract = {Whole genome comparison based on gene order has become a popular approach in comparative genomics. An important task in this field is the detection of gene clusters, i.e., sets of genes that occur co-localized in several genomes. For most applications, it is preferable to extend this definition to allow for small deviations in the gene content of the cluster occurrences. However, relaxing the equality constraint increases the computational complexity of gene cluster detection drastically. Existing approaches deal with this problem by using simplifying constraints on the cluster definition and/or allowing only pairwise genome comparison. In this article, we introduce a cluster concept named median gene clusters that improves over existing models, present efficient algorithms for their computation and show experimental results on the detection of approximate gene clusters in multiple genomes.},
    comment = {JV von RECOMB 2008},
    doi = {10.1089/cmb.2009.0098},
    file = {:2009/BoeckerEtAl_ComputationMedianGeneClusters_JCB_2009.pdf:PDF},
    keywords = {jena; linkpdf; 3AGC; gene clusters, genome rearrangements},
    owner = {Sebastian},
    pmid = {19689215},
    timestamp = {2009.10.10},
    }
  • [DOI] S. Böcker, B. Kehr, and F. Rasche, "Determination of glycan structure from tandem mass spectra," in Proc. of computing and combinatorics conference (cocoon 2009), 2009, p. 258–267.
    [Bibtex]
    @InProceedings{boecker09determination,
    author = {Sebastian B\"ocker and Birte Kehr and Florian Rasche},
    title = {Determination of glycan structure from tandem mass spectra},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2009)},
    year = {2009},
    volume = {5609},
    series = lncs,
    pages = {258--267},
    publisher = Springer,
    abstract = {Glycans are molecules made from simple sugars that form complex tree structures. Glycans constitute one of the most important protein modifications, and identification of glycans remains a pressing problem in biology. Unfortunately, the structure of glycans is hard to predict from the genome sequence of an organism. We consider the problem of deriving the topology of a glycan solely from tandem mass spectrometry data. We want to generate glycan tree candidates that sufficiently match the sample mass spectrum. Unfortunately, the resulting problem is known to be computationally hard. We present an efficient exact algorithm for this problem based on fixedparameter algorithmics, that can process a spectrum in a matter of seconds. We also report some preliminary results of our method on experimental data. We show that our approach is fast enough in applications, and that we can reach very good de novo identification results. Finally, we show how to compute the number of glycan topologies of a given size.},
    doi = {10.1007/978-3-642-02882-3_26},
    file = {BoeckerEtAl_DeterminationGlycanStructure_COCOON_2009.pdf:2009/BoeckerEtAl_DeterminationGlycanStructure_COCOON_2009.pdf:PDF},
    journalref = {boecker11determination},
    keywords = {fpt; jena; linkpdf; glycan ms; ms; PABI},
    owner = {Sebastian},
    timestamp = {2010.02.16},
    }
  • [DOI] S. Böcker, M. Letzel, relax Zs, and A. Pervukhin, "SIRIUS: decomposing isotope patterns for metabolite identification," Bioinformatics, vol. 25, iss. 2, p. 218–224, 2009.
    [Bibtex]
    @Article{boecker09sirius,
    author = {Sebastian B\"ocker and Matthias Letzel and {\relax Zs}uzsanna Lipt{\'a}k and Anton Pervukhin},
    title = {{SIRIUS}: Decomposing isotope patterns for metabolite identification},
    journal = {Bioinformatics},
    year = {2009},
    volume = {25},
    number = {2},
    pages = {218--224},
    abstract = {Motivation: High-resolution mass spectrometry (MS) is among the most widely used technologies in metabolomics. Metabolites participate in almost all cellular processes, but most metabolites still remain uncharacterized. Determination of the sum formula is a crucial step in the identification of an unknown metabolite, as it reduces its possible structures to a hopefully manageable set. Results: We present a method for determining the sum formula of a metabolite solely from its mass and the natural distribution of its isotopes. Our input is a measured isotope pattern from a high resolution mass spectrometer, and we want to find those molecules that best match this pattern. Our method is computationally efficient, and results on experimental data are very promising: For orthogonal time-of-flight mass spectrometry, we correctly identify sum formulas for more than 90\% of the molecules, ranging in mass up to 1000 Da. Availability: SIRIUS is available under the LGPL license at http://bio.informatik.uni-jena.de/sirius/. Contact:  ed.an1556059223ej-in1556059223u.ten1556059223im@ni1556059223hkuvr1556059223ep.no1556059223tna1556059223},
    doi = {10.1093/bioinformatics/btn603},
    file = {:2009/BoeckerEtAl_SiriusDecomposingIsotopePatterns_Bioinf_2009.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; ms},
    owner = {apervukh},
    pmid = {19015140},
    timestamp = {2008.11.06},
    url = {http://bioinformatics.oxfordjournals.org/cgi/content/full/25/2/218},
    }
  • [DOI] S. Böcker and A. Pervukhin, "Inferring peptide composition from molecular formulas," in Proc. of computing and combinatorics conference (cocoon 2009), 2009, p. 277–286.
    [Bibtex]
    @InProceedings{boecker09inferring,
    author = {Sebastian B\"ocker and Anton Pervukhin},
    title = {Inferring peptide composition from molecular formulas},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2009)},
    year = {2009},
    volume = {5609},
    series = lncs,
    pages = {277--286},
    publisher = Springer,
    abstract = {With the advent of novel mass spectrometry techniques such as Orbitrap MS, it is possible to determine the exact molecular formula of an unknown molecule solely from its isotope pattern. But for protein mass spectrometry, one is facing the problem that many peptides have exactly the same molecular formula even when ignoring the order of amino acids. In this work, we present an efficient method to determine the amino acid composition of an unknown peptide solely from its molecular formula. Our solution is based on efficiently enumerating all solutions of the multi-dimensional equality constrained integer knapsack problem.},
    doi = {10.1007/978-3-642-02882-3},
    file = {:2009/BoeckerPervukhin_InferringPeptideCompositionMolecularFormulas_Cocoon_2009.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {apervukh},
    timestamp = {2009.02.19},
    }
  • [DOI] S. Böcker, F. Rasche, and T. Steijger, "Annotating fragmentation patterns," in Proc. of workshop on algorithms in bioinformatics (wabi~2009), 2009, p. 13–24.
    [Bibtex]
    @InProceedings{boecker09annotating,
    author = {Sebastian B\"ocker and Florian Rasche and Tamara Steijger},
    title = {Annotating fragmentation patterns},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI~2009)},
    year = {2009},
    volume = {5724},
    series = lncs,
    pages = {13--24},
    publisher = Springer,
    abstract = {Mass spectrometry is one of the key technologies in metabolomics for the identification and quantification of molecules in small concentrations. For identification, these molecules are fragmented by, e.g., tandem mass spectrometry, and masses and abundances of the resulting fragments are measured. Recently, methods for de novo interpretation of tandem mass spectra and the automated inference of fragmentation patterns have been developed. If the correct structural formula is known, then peaks in the fragmentation pattern can be annotated by substructures of the underlying compound. To determine the structure of these fragments manually is tedious and time-consuming. Hence, there is a need for automated identification of the generated fragments. In this work, we consider the problem of annotating fragmentation patterns. Our input are fragmentation trees, representing tandem mass spectra where each peak has been assigned a molecular formula, and fragmentation dependencies are known. Given a fixed structural formula and any fragment molecular formula, we search for all structural fragments that satisfy elemental multiplicities. Ultimately, we search for a fragmentation pattern annotation with minimum total cleavage costs. We discuss several algorithmic approaches for this problem, including a randomized and a tree decomposition-based algorithm. We find that even though the problem of identifying structural fragments is NP-hard, instances based on molecular structures can be efficiently solved with a classical branch-and-bound algorithm.},
    doi = {10.1007/978-3-642-04241-6},
    file = {BoeckerEtAl_AnnotatingFragmentationPatterns_WABI_2009.pdf:2009/BoeckerEtAl_AnnotatingFragmentationPatterns_WABI_2009.pdf:PDF},
    keywords = {jena; linkpdf; tandem ms; metabolite ms; ft; ms; idun; PABI; fpt; TrACReview},
    owner = {Sebastian},
    timestamp = {2009.05.31},
    }
  • S. Böcker, T. Zichner, and F. Rasche, "Automated classification of unknown biocompounds using tandem MS," in Poster proc. of conference of the american society for mass spectrometry (asms 2009), 2009, p. W690.
    [Bibtex]
    @InProceedings{boecker09automated,
    author = {Sebastian B\"ocker AND Thomas Zichner AND Florian Rasche},
    title = {Automated classification of unknown biocompounds using tandem {MS}},
    booktitle = {Poster Proc. of Conference of the American Society for Mass Spectrometry (ASMS 2009)},
    year = {2009},
    pages = {W690},
    keywords = {metabolite ms; tandem ms; ft; idun; TrACReview},
    owner = {rasche},
    timestamp = {2009.05.04},
    }
  • [DOI] A. Bertsch, A. Leinenbach, A. Pervukhin, M. Lubeck, R. Hartmer, C. Baessmann, Y. A. Elnakady, R. Müller, S. Böcker, C. G. Huber, and O. Kohlbacher, "De novo peptide sequencing by tandem MS using complementary CID and electron transfer dissociation," Electrophoresis, vol. 30, iss. 21, p. 3736–3747, 2009.
    [Bibtex]
    @Article{bertsch09de-novo,
    author = {Andreas Bertsch and Andreas Leinenbach and Anton Pervukhin and Markus Lubeck and Ralf Hartmer and Carsten Baessmann and Yasser Abbas Elnakady and Rolf M\"uller and Sebastian B\"ocker and Christian G Huber and Oliver Kohlbacher},
    title = {De novo peptide sequencing by tandem {MS} using complementary {CID} and electron transfer dissociation},
    journal = {Electrophoresis},
    year = {2009},
    volume = {30},
    number = {21},
    pages = {3736--3747},
    abstract = {De novo sequencing of peptides using tandem MS is difficult due to missing fragment ions in the spectra commonly obtained after CID of peptide precursor ions. Complementing CID spectra with spectra obtained in an ion-trap mass spectrometer upon electron transfer dissociation (ETD) significantly increases the sequence coverage with diagnostic ions. In the de novo sequencing algorithm CompNovo presented here, a divide-and-conquer approach was combined with an efficient mass decomposition algorithm to exploit the complementary information contained in CID and ETD spectra. After optimizing the parameters for the algorithm on a well-defined training data set obtained for peptides from nine known proteins, the CompNovo algorithm was applied to the de novo sequencing of peptides derived from a whole protein extract of Sorangium cellulosum bacteria. To 2406 pairs of CID and ETD spectra contained in this data set, 675 fully correct sequences were assigned, which represent a success rate of 28.1\%. It is shown that the CompNovo algorithm yields significantly improved sequencing accuracy as compared with published approaches using only CID spectra or combined CID and ETD spectra.},
    doi = {10.1002/elps.200900332},
    file = {:2009/BertschEtAl_DeNovoPeptideSequencing_Electrophoresis_2009.pdf:PDF},
    keywords = {ms, peptide sequencing, jena},
    owner = {Sebastian},
    pmid = {19862751},
    timestamp = {2009.10.31},
    }
  • [DOI] A. Scherbart, W. Timm, S. Böcker, and T. W. Nattkemper, "Improved mass spectrometry peak intensity prediction by adaptive feature weighting," in Proc. of international conference on neural information processing of the asia-pacific neural network assembly (iconip 2008), 2009, p. 513–520.
    [Bibtex]
    @InProceedings{scherbart09improved,
    author = {Alexandra Scherbart and Wiebke Timm and Sebastian B\"ocker and Tim W. Nattkemper},
    title = {Improved Mass Spectrometry Peak Intensity Prediction by Adaptive Feature Weighting},
    booktitle = {Proc. of International Conference on Neural Information Processing of the Asia-Pacific Neural Network Assembly (ICONIP 2008)},
    year = {2009},
    volume = {5506},
    series = lncs,
    pages = {513--520},
    publisher = Springer,
    abstract = {Mass spectrometry (MS) is a key technique for the analysis and identification of proteins. A prediction of spectrum peak intensities from pre-computed molecular features would pave the way to a better understanding of spectrometry data and improved spectrum evaluation. The goal is to model the relationship between peptides and peptide peak heights in MALDI-TOF mass spectra, only using the peptide's sequence information and the chemical properties. To cope with this high dimensional data, we propose a regression based combination of feature weightings and a linear predictor to focus on relevant features. This offers simpler models, scalability, and better generalization. We show that the overall performance utilizing the estimation of feature relevance and re-training compared to using the entire feature space can be improved.},
    doi = {10.1007/978-3-642-02490-0_63},
    file = {:2008/ScherbartEtAl_ImprovedMassSpectrometry_ICONIP_2008.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2008.10.07},
    }

2008

  • S. Böcker, S. Briesemeister, Q. B. A. Bui, and A. Truss, "A fixed-parameter approach for weighted cluster editing," in Proc. of asia-pacific bioinformatics conference (apbc 2008), 2008, p. 211–220.
    [Bibtex]
    @InProceedings{boecker08fixed-parameter,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Quang Bao Anh Bui and Anke Truss},
    title = {A fixed-parameter approach for Weighted Cluster Editing},
    booktitle = {Proc. of Asia-Pacific Bioinformatics Conference (APBC 2008)},
    year = {2008},
    volume = {5},
    series = {Series on Advances in Bioinformatics and Computational Biology},
    pages = {211--220},
    publisher = {Imperial College Press},
    abstract = {Clustering objects with respect to a given similarity or distance measure is a problem often encountered in computational biology. Several well-known clustering algorithms are based on transforming the input matrix into a weighted graph although the resulting Weighted Cluster Editing problem is computationally hard: here, we transform the input graph into a disjoint union of cliques such that the sum of weights of all modified edges is minimized. We present fixed-parameter algorithms for this problem which guarantee to find an optimal solution in provable worst-case running time. We introduce a new data reduction operation (merging vertices) that has no counterpart in the unweighted case and strongly cuts down running times in practice. We have applied our algorithms to both artificial and biological data. Despite the complexity of the problem, our method often allows exact computation of optimal solutions in reasonable running time.},
    data = {cluster_editing_data},
    file = {BoeckerEtAl_FixedParameterApproachWeightedClusterEditing_APBC_2008.pdf:2008/BoeckerEtAl_FixedParameterApproachWeightedClusterEditing_APBC_2008.pdf:PDF},
    keywords = {jena; FPT; linkpdf; PABI},
    opteditor = {Alvia Brazma and Satoru Miyano and Tatsuya Akutsu},
    owner = {Sebastian},
    timestamp = {2008.10.06},
    url = {http://ebooks.worldscinet.com/ISBN/9781848161092/9781848161092_0023.html},
    }
  • [DOI] S. Böcker, S. Briesemeister, Q. B. A. Bui, and A. Truss, "Going weighted: parameterized algorithms for cluster editing," in Proc. of conference on combinatorial optimization and applications (cocoa 2008), 2008, p. 1–12.
    [Bibtex]
    @InProceedings{boecker08going,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Quang Bao Anh Bui and Anke Truss},
    title = {Going Weighted: Parameterized Algorithms for Cluster Editing},
    booktitle = {Proc. of Conference on Combinatorial Optimization and Applications (COCOA 2008)},
    year = {2008},
    volume = {5165},
    series = lncs,
    pages = {1--12},
    publisher = Springer,
    abstract = {The goal of the Cluster Editing problem is to make the fewest changes to the edge set of an input graph such that the resulting graph is a disjoint union of cliques. This problem is NP-complete but recently, several parameterized algorithms have been proposed. In this paper we present a surprisingly simple branching strategy for Cluster Editing. We generalize the problem assuming that edge insertion and deletion costs are positive integers. We show that the resulting search tree has size O(1.82^k) for edit cost k, resulting in the currently fastest parameterized algorithm for this problem. We have implemented and evaluated our approach, and find that it outperforms other parametrized algorithms for the problem.},
    data = {cluster_editing_data},
    doi = {10.1007/978-3-540-85097-7_1},
    file = {BoeckerEtAl_GoingWeightedParameterized_COCOA_2008.pdf:2008/BoeckerEtAl_GoingWeightedParameterized_COCOA_2008.pdf:PDF},
    journalref = {boecker09going},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {Sebastian},
    timestamp = {2009.03.08},
    }
  • [DOI] S. Böcker, S. Briesemeister, and G. W. Klau, "Exact algorithms for cluster editing: evaluation and experiments," in Proc. of workshop on experimental algorithms (wea 2008), 2008, p. 289–302.
    [Bibtex]
    @InProceedings{boecker08exact,
    author = {Sebastian B\"ocker and Sebastian Briesemeister and Gunnar W. Klau},
    title = {Exact Algorithms for Cluster Editing: Evaluation and Experiments},
    booktitle = {Proc. of Workshop on Experimental Algorithms (WEA 2008)},
    year = {2008},
    volume = {5038},
    series = lncs,
    pages = {289--302},
    publisher = Springer,
    abstract = {We present empirical results for the Cluster Editing problem using exact methods from fixed-parameter algorithmics and linear programming. We investigate parameter-independent data reduction methods and find that effective preprocessing is possible if the number of edge modifications k is smaller than some multiple of |V|. In particular, combining parameter-dependent data reduction with lower and upper bounds we can effectively reduce graphs satisfying k <= 25 |V|. In addition to the fastest known fixed-parameter branching strategy for the problem, we investigate an integer linear program (ILP) formulation of the problem using a cutting plane approach. Our results indicate that both approaches are capable of solving large graphs with 1000 vertices and several thousand edge modifications. For the first time, complex and very large graphs such as biological instances allow for an exact solution, using a combination of the above techniques.},
    data = {cluster_editing_data},
    doi = {10.1007/978-3-540-68552-4_22},
    file = {BoeckerEtAl_ExactAlgorithmsClusterEditing_WEA_2008.pdf:2008/BoeckerEtAl_ExactAlgorithmsClusterEditing_WEA_2008.pdf:PDF},
    journalref = {boecker11exact},
    keywords = {jena; FPT; linkpdf; PABI},
    owner = {Sebastian},
    timestamp = {2008.03.03},
    }
  • [DOI] S. Böcker, Q. B. A. Bui, and A. Truss, "An improved fixed-parameter algorithm for minimum-flip consensus trees," in Proc. of international workshop on parameterized and exact computation (iwpec 2008), 2008, p. 43–54.
    [Bibtex]
    @InProceedings{boecker08improved,
    author = {Sebastian B\"ocker and Quang Bao Anh Bui and Anke Truss},
    title = {An Improved Fixed-Parameter Algorithm for Minimum-Flip Consensus Trees},
    booktitle = {Proc. of International Workshop on Parameterized and Exact Computation (IWPEC 2008)},
    year = {2008},
    editor = {Martin Grohe and Rolf Niedermeier},
    volume = {5018},
    series = lncs,
    pages = {43--54},
    publisher = SV,
    abstract = {In computational phylogenetics, the problem of constructing a consensus tree for a given set of input trees has frequently been addressed. In this paper we study the \textsc{Minimum-Flip Problem}: the input trees are transformed into a binary matrix, and we want to find a perfect phylogeny for this matrix using a minimum number of flips, that is, corrections of single entries in the matrix. In its graph-theoretical formulation, the problem is as follows: Given a bipartite graph G=(V_t \cup V_c, E), the problem is to find a minimum set of edge modifications such that the resulting graph has no induced path with four edges which starts and ends in V_t. We present a fixed-parameter algorithm for the Minimum-Flip Problem with running time O(4.83^k (m+n) + mn) for n taxa, m characters, and k flips. Additionally, we discuss several heuristic improvements. We also report computational results on phylogenetic data.},
    doi = {10.1007/978-3-540-79723-4},
    file = {BoeckerEtAl_ImprovedFixedParameterAlgorithmMinimumFlipConsensusTrees_IWPEC_2008.pdf:2008/BoeckerEtAl_ImprovedFixedParameterAlgorithmMinimumFlipConsensusTrees_IWPEC_2008.pdf:PDF},
    journalref = {boecker12improved},
    keywords = {jena; FPT; linkpdf; phylogenetics; PABI},
    owner = {baoanh},
    timestamp = {2008.02.15},
    }
  • [DOI] S. Böcker, K. Jahn, J. Mixtacki, and J. Stoye, "Computation of median gene clusters," in Proc. of research in computational molecular biology (recomb 2008), 2008, p. 331–345.
    [Bibtex]
    @InProceedings{boecker08computation,
    author = {Sebastian B\"ocker and Katharina Jahn and Julia Mixtacki and Jens Stoye},
    title = {Computation of Median Gene Clusters},
    booktitle = {Proc. of Research in Computational Molecular Biology (RECOMB 2008)},
    year = {2008},
    volume = {4955},
    series = lncs,
    pages = {331--345},
    publisher = Springer,
    abstract = {Whole genome comparison based on gene order has become a popular approach in comparative genomics. An important task in this field is the detection of gene clusters, i.e. sets of genes that occur colocalized in several genomes. For most applications it is preferable to extend this definition to allow for small deviations in the gene content of the cluster occurrences. However, relaxing the equality constraint increases the computational complexity of gene cluster detection drastically. Existing approaches deal with this problem by using simplifying constraints on the cluster definition and/or allowing only pairwise genome comparison. In this paper we introduce a cluster concept named median gene clusters that improves over existing models and present efficient algorithms for their computation that allow for the detection of approximate gene clusters in multiple genomes.},
    doi = {10.1007/978-3-540-78839-3_28},
    file = {BoeckerEtAl_ComputationMedianGeneClusters_RECOMB_2008.pdf:2008/BoeckerEtAl_ComputationMedianGeneClusters_RECOMB_2008.pdf:PDF},
    journalref = {boecker09computation},
    keywords = {jena; linkpdf; 3AGC; gene clusters, genome rearrangements},
    owner = {Sebastian},
    timestamp = {2007.12.11},
    }
  • [DOI] S. Böcker, relax Zs, M. Martin, A. Pervukhin, and H. Sudek, "DECOMP–from interpreting mass spectrometry peaks to solving the Money Changing Problem," Bioinformatics, vol. 24, iss. 4, p. 591–593, 2008.
    [Bibtex]
    @Article{boecker08decomp,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k and Marcel Martin and Anton Pervukhin and Henner Sudek},
    title = {{DECOMP}--from interpreting Mass Spectrometry peaks to solving the {Money Changing Problem}},
    journal = {Bioinformatics},
    year = {2008},
    volume = {24},
    number = {4},
    pages = {591--593},
    abstract = {We introduce DECOMP, a tool that computes the sum formula of all molecules whose mass equals the input mass. This problem arises frequently in biochemistry and mass spectrometry (MS), when we know the molecular mass of a protein, DNA, or metabolite fragment but have no other information. A closely related problem is known as the Money Changing Problem (MCP), where all masses are positive integers. Recently, efficient algorithms have been developed for the MCP, which DECOMP applies to real valued MS data. The excellent performance of this method on proteomic and metabolomic MS data has recently been demonstrated. DECOMP has an easy-to-use graphical interface, which caters for both types of users: those interested in solving MCP instances and those submitting MS data.},
    doi = {10.1093/bioinformatics/btm631},
    file = {BoeckerEtAl_DecompInterpretingMassSpectrometry_Bioinf_2008.pdf:2008/BoeckerEtAl_DecompInterpretingMassSpectrometry_Bioinf_2008.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; metabolite ms; TrACReview},
    owner = {Sebastian},
    pmid = {18174179},
    timestamp = {2007.11.05},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/24/4/591?ijkey=1lM50Bkzz4SCLsa&keytype=ref},
    }
  • [DOI] S. Böcker and V. Mäkinen, "Combinatorial approaches for mass spectra recalibration," Ieee/acm trans comput biology bioinform, vol. 5, iss. 1, p. 91–100, 2008.
    [Bibtex]
    @Article{boecker08combinatorial,
    author = {Sebastian B\"ocker and Veli M\"akinen},
    title = {Combinatorial Approaches for Mass Spectra Recalibration},
    journal = TCBB,
    year = {2008},
    volume = {5},
    number = {1},
    pages = {91--100},
    issn = {1545-5963},
    abstract = {Mass spectrometry has become one of the most popular analysis techniques in Proteomics and Systems Biology. With the creation of larger datasets, the automated recalibration of mass spectra becomes important to ensure that every peak in the sample spectrum is correctly assigned to some peptide and protein. Algorithms for recalibrating mass spectra have to be robust with respect to wrongly assigned peaks, as well as efficient due to the amount of mass spectrometry data. The recalibration of mass spectra leads us to the problem of finding an optimal matching between mass spectra under measurement errors.We have developed two deterministic methods that allow robust computation of such a matching: The first approach uses a computational geometry interpretation of the problem, and tries to find two parallel lines with constant distance that stab a maximal number of points in the plane. The second approach is based on finding a maximal common approximate subsequence, and improves existing algorithms by one order of magnitude exploiting the sequential nature of the matching problem. We compare our results to a computational geometry algorithm using a topological line-sweep.},
    doi = {10.1109/tcbb.2007.1077},
    file = {BoeckerMaekinen_CombinatorialApproachesMassSpectra_TCBB_2008.pdf:2008/BoeckerMaekinen_CombinatorialApproachesMassSpectra_TCBB_2008.pdf:PDF},
    keywords = {jena; linkpdf; ms, recalibration, computational geometry},
    owner = {Sebastian},
    pmid = {18245878},
    timestamp = {2007.09.22},
    }
  • [DOI] S. Böcker and F. Rasche, "Towards de novo identification of metabolites by analyzing tandem mass spectra," Bioinformatics, vol. 24, p. I49-I55, 2008.
    [Bibtex]
    @Article{boecker08towards,
    author = {Sebastian B\"ocker and Florian Rasche},
    title = {Towards de novo identification of metabolites by analyzing tandem mass spectra},
    journal = {Bioinformatics},
    year = {2008},
    volume = {24},
    pages = {I49-I55},
    note = {Proc. of \emph{European Conference on Computational Biology} (ECCB 2008)},
    doi = {10.1093/bioinformatics/btn270},
    file = {BoeckerRasche_TowardsDeNovoIdentification_Bioinf_2008.pdf:2008/BoeckerRasche_TowardsDeNovoIdentification_Bioinf_2008.pdf:PDF},
    keywords = {jena; linkpdf; metabolite ms; ms; idun; ft; TrACReview},
    owner = {rasche},
    pmid = {18689839},
    timestamp = {08.04.2008},
    }
  • [DOI] T. Griebel, M. Brinkmeyer, and S. Böcker, "EPoS: a modular software framework for phylogenetic analysis," Bioinformatics, vol. 24, iss. 20, p. 2399–2400, 2008.
    [Bibtex]
    @Article{griebel08epos,
    author = {Thasso Griebel and Malte Brinkmeyer and Sebastian B\"ocker},
    title = {{EPoS}: A modular software framework for phylogenetic analysis},
    journal = {Bioinformatics},
    year = {2008},
    volume = {24},
    number = {20},
    pages = {2399--2400},
    doi = {10.1093/bioinformatics/btn364},
    keywords = {jena; phylogenetics},
    owner = {thasso},
    pmid = {18632748},
    timestamp = {2008.07.16},
    url = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btn364?ijkey=08ZjFqzfs4WCjFn&keytype=ref},
    }
  • [DOI] W. Timm, A. Scherbart, S. Böcker, O. Kohlbacher, and T. W. Nattkemper, "Peak intensity prediction in MALDI-TOF mass spectrometry: a machine learning study to support quantitative proteomics," Bmc bioinf, vol. 9, p. 443, 2008.
    [Bibtex]
    @Article{timm08peak,
    author = {Wiebke Timm and Alexandra Scherbart and Sebastian B\"ocker and Oliver Kohlbacher and Tim W. Nattkemper},
    title = {Peak Intensity Prediction in {MALDI-TOF} Mass Spectrometry: A Machine Learning Study to Support Quantitative Proteomics},
    journal = {BMC Bioinf},
    year = {2008},
    volume = {9},
    pages = {443},
    abstract = {Background. Mass spectrometry is a key technique in proteomics and can be used to analyze complex samples quickly. One key problem with the mass spectrometric analysis of peptides and proteins, however, is the fact that absolute quantification is severely hampered by the unclear relationship between the observed peak intensity and the peptide concentration in the sample. While there are numerous approaches to circumvent this problem experimentally (e.g. labeling techniques), reliable prediction of the peak intensities from peptide sequences could provide a peptide-specific correction factor. Thus, it would be a valuable tool towards label-free absolute quantification. Results. In this work we present machine learning techniques for peak intensity prediction for MALDI mass spectra. Features encoding the peptides' physico-chemical properties as well as string-based features were extracted. A feature subset was obtained from multiple forward feature selections on the extracted features. Based on these features, two advanced machine learning methods (support vector regression and local linear maps) are shown to yield good results for this problem (Pearson correlation of 0.68 in a ten-fold cross validation). Conclusions. The techniques presented here are a useful first step going beyond the binary prediction of proteotypic peptides towards a more quantitative prediction of peak intensities. These predictions in turn will turn out to be beneficial for mass spectrometry-based quantitative proteomics.},
    doi = {10.1186/1471-2105-9-443},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    pmid = {18937839},
    timestamp = {2008.10.06},
    url = {http://www.biomedcentral.com/1471-2105/9/443},
    }

2007

  • [DOI] S. Böcker, "Simulating multiplexed SNP discovery rates using base-specific cleavage and mass spectrometry," Bioinformatics, vol. 23, iss. 2, p. e5-e12, 2007.
    [Bibtex]
    @Article{boecker07simulating,
    author = {Sebastian B\"ocker},
    title = {Simulating multiplexed {SNP} discovery rates using base-specific cleavage and mass spectrometry},
    journal = {Bioinformatics},
    year = {2007},
    volume = {23},
    number = {2},
    pages = {e5-e12},
    note = {Proc. of \emph{European Conference on Computational Biology} (ECCB 2006)},
    abstract = {Motivation: Single Nucleotide Polymorphisms (SNPs) are believed to contribute strongly to the genetic variability in living beings, and SNP and mutation discovery are of great interest in today's Life Sciences. A comparatively new method to discover such polymorphisms is based on base-specific cleavage, where resulting cleavage products are analyzed by mass spectrometry (MS). One particular advantage of this method is the possibility of multiplexing the biochemical reactions, i.e. examining multiple genomic regions in parallel. Simulations can help estimating the performance of a method for polymorphism discovery, and allow us to evaluate the influence of method parameters on the discovery rate, and also to investigate whether the method is well suited for a certain genomic region. Results: We show how to efficiently conduct such simulations for polymorphism discovery using base-specific cleavage and MS. Simulating multiplexed polymorphism discovery leads us to the problem of uniformly drawing a multiplex. Given a multiset of natural numbers we want to uniformly draw a subset of fixed cardinality so that the elements sum up to some fixed total length. We show how to enumerate multiplex layouts using dynamic programming, which allows us to uniformly draw a multiplex.},
    doi = {10.1093/bioinformatics/btl291},
    file = {Boecker_SimulatingMultiplexedSNPDiscoveryRates_ECCB_2006.pdf:2006/Boecker_SimulatingMultiplexedSNPDiscoveryRates_ECCB_2006.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    owner = {Sebastian},
    pmid = {17237104},
    timestamp = {2006.10.17},
    }
  • [DOI] S. Böcker and H. Kaltenbach, "Mass spectra alignments and their significance," J discrete algorithms, vol. 5, iss. 4, p. 714–728, 2007.
    [Bibtex]
    @Article{boecker07mass,
    author = {Sebastian B\"ocker and Hans-Michael Kaltenbach},
    title = {Mass Spectra Alignments and Their Significance},
    journal = {J Discrete Algorithms},
    year = {2007},
    volume = {5},
    number = {4},
    pages = {714--728},
    abstract = {Mass Spectrometry has become one of the most popular analysis techniques in Genomics and Systems Biology. We investigate a general framework that allows the alignment (or matching) of any two mass spectra. In particular, we examine the alignment of a reference mass spectrum generated in silico from a database, with a measured sample mass spectrum. In this context, we assess the significance of alignment scores for character-specific cleavage experiments, such as tryptic digestion of amino acids. We present an efficient approach to estimate this significance, with runtime linear in the number of detected peaks. In this context, we investigate the probability that a random string over a weighted alphabet contains a substring of some given weight.},
    comment = {Definition of Mass Spectra Alignments, Combinatorial derivation of fragment, mass-occurrence probabilities, estimation of moments of score distribution},
    doi = {10.1016/j.jda.2006.11.003},
    file = {BoeckerKaltenbach_MassSpectraAlignments_JDiscAlgor_2007.pdf:2007/BoeckerKaltenbach_MassSpectraAlignments_JDiscAlgor_2007.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Standard},
    timestamp = {2006.12.04},
    }
  • [DOI] S. Böcker and relax Zs, "A fast and simple algorithm for the Money Changing Problem," Algorithmica, vol. 48, iss. 4, p. 413–432, 2007.
    [Bibtex]
    @Article{boecker07fast,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k},
    title = {A fast and simple algorithm for the {Money Changing Problem}},
    journal = {Algorithmica},
    year = {2007},
    volume = {48},
    number = {4},
    pages = {413--432},
    abstract = {The Money Changing Problem (MCP) can be stated as follows: Given $k$ positive integers $a_1< \ldots < a_k$ and a query integer $M$, is there a linear combination $\sum_i c_ia_i = M$ with non-negative integers $c_i$, a \emph{decomposition} of $M$? If so, produce one or all such decompositions. The largest integer without such a decomposition is called the \emph{Frobenius number} $g(a_1,\ldots,a_k)$. A data structure called {\em residue table} of $a_1$ words can be used to compute the Frobenius number in time $O(a_1)$. We present an intriguingly simple algorithm for computing the residue table which runs in time $O(ka_1)$, with no additional memory requirements, outperforming the best previously known algorithm. Simulations show that it performs well even on 'hard' instances from the literature. In addition, we can employ the residue table to answer MCP decision instances in constant time, and a slight modification of size $O(a_1)$ to compute one decomposition for a query $M$. Note that since both, computing the Frobenius number and MCP (decision) are NP-hard, one cannot expect to find an algorithm that is polynomial in the size of the input, i.e., in $k,\log a_k$, and $\log M$. We then give an algorithm which, using a modification of the residue table, also constructible in $O(ka_1)$ time, computes all decompositions of a query integer $M$. Its worst-case running time is $O(ka_1)$ for each decomposition, thus the total runtime depends only on the output size and is independent of the size of query $M$ itself. We apply our latter algorithm to interpreting mass spectrometry (MS) peaks: Due to its high speed and accuracy, MS is now the method of choice in protein identification. Interpreting individual peaks is one of the recurring subproblems in analyzing MS data; the task is to identify sample molecules whose mass the peak possibly represents. This can be stated as an MCP instance, with the masses of the individual amino acids as the $k$ integers $a_1,\ldots, a_k$. Our simulations show that our algorithm is fast on real data and is well suited for generating candidates for peak interpretation.},
    doi = {10.1007/s00453-007-0162-8},
    file = {BoeckerLiptak_FastSimpleAlgorithmMoneyChanging_Algorithmica_2007.pdf:2007/BoeckerLiptak_FastSimpleAlgorithmMoneyChanging_Algorithmica_2007.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; frobenius},
    owner = {Sebastian},
    timestamp = {2006.10.17},
    }
  • [DOI] H. Kaltenbach, A. Wilke, and S. Böcker, "SAMPI: protein identification with mass spectra alignments," Bmc bioinf, vol. 8, p. 102, 2007.
    [Bibtex]
    @Article{kaltenbach07sampi,
    author = {Hans-Michael Kaltenbach and Andreas Wilke and Sebastian B\"ocker},
    title = {{SAMPI}: Protein identification with Mass Spectra Alignments},
    journal = {BMC Bioinf},
    year = {2007},
    volume = {8},
    pages = {102},
    abstract = {Background: Mass spectrometry based peptide mass fingerprints (PMFs) offer a fast, efficient, and robust method for protein identification. A protein is digested (usually by trypsin) and its mass spectrum is compared to simulated spectra for protein sequences in a database. However, existing tools for analyzing PMFs often suffer from missing or heuristic analysis of the significance of search results and insufficient handling of missing and additional peaks. Results: We present an unified framework for analyzing Peptide Mass Fingerprints that offers a number of advantages over existing methods: First, comparison of mass spectra is based on a scoring function that can be custom-designed for certain applications and explicitly takes missing and additional peaks into account. The method is able to simulate almost every additive scoring scheme. Second, we present an efficient deterministic method for assessing the significance of a protein hit, independent of the underlying scoring function and sequence database. We prove the applicability of our approach using biological mass spectrometry data and compare our results to the standard software Mascot. Conclusion: The proposed framework for analyzing Peptide Mass Fingerprints shows performance comparable to Mascot on small peak lists. Introducing more noise peaks, we are able to keep identification rates at a similar level by using the flexibility introduced by scoring schemes.},
    doi = {10.1186/1471-2105-8-102},
    file = {KaltenbachEtAl_SampiProteinIdentification_BMCBioinf_2007.pdf:2007/KaltenbachEtAl_SampiProteinIdentification_BMCBioinf_2007.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    pmid = {17386090},
    timestamp = {2007.09.22},
    }
  • [DOI] M. Kaltenbach, S. Böcker, and S. Rahmann, "Markov additive chains and applications to fragment statistics for peptide mass fingerprinting," in Proc. of recomb satellite conference on systems biology and computational proteomics 2006, 2007, p. 29–41.
    [Bibtex]
    @InProceedings{kaltenbach07markov,
    author = {Michael Kaltenbach and Sebastian B\"ocker and Sven Rahmann},
    title = {{Markov} Additive Chains and Applications to Fragment Statistics for Peptide Mass Fingerprinting},
    booktitle = {Proc. of RECOMB Satellite Conference on Systems Biology and Computational Proteomics 2006},
    year = {2007},
    volume = {4532},
    series = lncs,
    pages = {29--41},
    publisher = Springer,
    doi = {10.1007/978-3-540-73060-6_3},
    file = {KaltenbachEtAl_MarkovAdditiveChains_RECOMBSatellite_2006.pdf:2006/KaltenbachEtAl_MarkovAdditiveChains_RECOMBSatellite_2006.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2007.01.02},
    }
  • S. Rahmann, T. Wittkop, J. Baumbach, M. Martin, A. Truss, and S. Böcker, "Exact and heuristic algorithms for weighted cluster editing," in Proc. of computational systems bioinformatics (csb 2007), 2007, p. 391–401.
    [Bibtex]
    @InProceedings{rahmann07exact,
    author = {Sven Rahmann and Tobias Wittkop and Jan Baumbach and Marcel Martin and Anke Truss and Sebastian B\"ocker},
    title = {Exact and Heuristic Algorithms for Weighted Cluster Editing},
    booktitle = {Proc. of Computational Systems Bioinformatics (CSB 2007)},
    year = {2007},
    volume = {6},
    pages = {391--401},
    abstract = {Clustering objects according to given similarity or distance values is a ubiquitous problem in computational biology with diverse applications, e.g., in defining families of orthologous genes, or in the analysis of microarray experiments. While there exists a plenitude of methods, many of them produce clusterings that can be further improved. "Cleaning up" initial clusterings can be formalized as projecting a graph on the space of transitive graphs; it is also known as the cluster editing or cluster partitioning problem in the literature. In contrast to previous work on cluster editing, we allow arbitrary weights on the similarity graph. To solve the so-defined weighted transitive graph projection problem, we present (1) the first exact fixed-parameter algorithm, (2) a polynomial-time greedy algorithm that returns the optimal result on a well-defined subset of "close-to-transitive" graphs and works heuristically on other graphs, and (3) a fast heuristic that uses ideas similar to those from the Fruchterman-Reingold graph layout algorithm. We compare quality and running times of these algorithms on both artificial graphs and protein similarity graphs derived from the 66 organisms of the COG dataset.},
    data = {cluster_editing_data},
    file = {RahmannEtAl_ExactHeuristicAlgorithms_CSB_2007.pdf:2007/RahmannEtAl_ExactHeuristicAlgorithms_CSB_2007.pdf:PDF},
    keywords = {jena; FPT; linkpdf; clustering method; PABI},
    owner = {Sebastian},
    pmid = {17951842},
    timestamp = {2007.09.09},
    url = {http://www.lifesciencessociety.org/CSB2007/toc/391.2007.html},
    }
  • [DOI] A. Scherbart, W. Timm, S. Böcker, and T. W. Nattkemper, "Neural network approach for mass spectrometry prediction by peptide prototyping," in Proc. of international conference on artificial neural networks (icann 2007), 2007, p. 90–99.
    [Bibtex]
    @InProceedings{scherbart07neural,
    author = {Alexandra Scherbart and Wiebke Timm and Sebastian B\"ocker and Tim W. Nattkemper},
    title = {Neural network approach for mass spectrometry prediction by peptide prototyping},
    booktitle = {Proc. of International Conference on Artificial Neural Networks (ICANN 2007)},
    year = {2007},
    volume = {4669},
    series = lncs,
    pages = {90--99},
    publisher = Springer,
    abstract = {In todays bioinformatics, Mass spectrometry (MS) is the key technique for the identification of proteins. A prediction of spectrum peak intensities from pre computed molecular features would pave the way to better understanding of spectrometry data and improved spectrum evaluation. We propose a neural network architecture of Local Linear Map (LLM)-type for peptide prototyping and learning locally tuned regression functions for peak intensity prediction in MALDI-TOF mass spectra. We obtain results comparable to those obtained by nu-Support Vector Regression and show how the LLM learning architecture provides a basis for peptide feature profiling and visualisation.},
    doi = {10.1007/978-3-540-74695-9},
    file = {ScherbartEtAl_NeuralNetworkApproach_ICANN_2007.pdf:2007/ScherbartEtAl_NeuralNetworkApproach_ICANN_2007.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2007.09.10},
    }
  • [DOI] A. Scherbart, W. Timm, S. Böcker, and T. W. Nattkemper, "SOM-based peptide prototyping for mass spectrometry peak intensity prediction," in Proc. of workshop on self-organizing maps (wsom 2007), 2007.
    [Bibtex]
    @InProceedings{scherbart07som-based,
    author = {Alexandra Scherbart and Wiebke Timm and Sebastian B\"ocker and Tim W. Nattkemper},
    title = {{SOM}-based peptide prototyping for mass spectrometry peak intensity prediction},
    booktitle = {Proc. of Workshop on Self-Organizing Maps (WSOM 2007)},
    year = {2007},
    note = {Doi 10.2390/biecoll-wsom2007-157.},
    doi = {10.2390/biecoll-wsom2007-157},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2007.09.10},
    }

2006

  • [DOI] S. Böcker, "Sequencing from compomers: the puzzle," Theory comput syst, vol. 39, iss. 3, p. 455–471, 2006.
    [Bibtex]
    @Article{boecker06sequencing,
    author = {Sebastian B\"ocker},
    title = {Sequencing from Compomers: The Puzzle},
    journal = TCSyst,
    year = {2006},
    volume = {39},
    number = {3},
    pages = {455--471},
    abstract = {The board game FragmindTM poses the following problem: The player has to reconstruct an (unknown) string s over the alphabet $\Sigma$. To this end, the game reports the following information to the player, for every character $x \in \Sigma$: First, the string s is cleaved wherever the character x is found in s. Second, every resulting fragment y is scrambled by a random permutation so that the only information left is how many times y contains each character $\sigma \in \Sigma$. These scrambled fragments are then reported to the player. Clearly, distinct strings can show identical cleavage patterns for all cleavage characters. In fact, even short strings of length 30+ usually have non-unique cleavage patterns. To this end, we introduce a generalization of the game setup called Sequencing from Compomers. We also generate those fragments of s that contain up to k uncleaved characters x, for some small and fixed threshold k. This modification dramatically increases the length of strings that can be uniquely reconstructed. We show that it is NP-hard to decide whether there exists some string compatible with the input data, but we also present a branch-and-bound runtime heuristic to find all such strings: The input data are transformed into subgraphs of the de Bruijn graph, and we search for walks in these subgraphs simultaneously. The above problem stems from the analysis of mass spectrometry data from base-specific cleavage of DNA sequences, and gives rise to a completely new approach to DNA de-novo sequencing.},
    doi = {10.1007/s00224-005-1238-y},
    file = {Boecker_SequencingFromCompomersPuzzle_TheorComputSyst_2006.pdf:2006/Boecker_SequencingFromCompomersPuzzle_TheorComputSyst_2006.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    owner = {Sebastian},
    timestamp = {2006.05.10},
    }
  • [DOI] S. Böcker, M. Letzel, relax Zs, and A. Pervukhin, "Decomposing metabolomic isotope patterns," in Proc. of workshop on algorithms in bioinformatics (wabi 2006), 2006, p. 12–23.
    [Bibtex]
    @InProceedings{boecker06decomposing,
    author = {Sebastian B\"ocker and Matthias Letzel and {\relax Zs}uzsanna Lipt{\'a}k and Anton Pervukhin},
    title = {Decomposing metabolomic isotope patterns},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2006)},
    year = {2006},
    volume = {4175},
    series = lncs,
    pages = {12--23},
    publisher = Springer,
    abstract = {We present a method for determining the sum formula of metabolites solely from their mass and isotope pattern. Metabolites, such as sugars or lipids, participate in almost all cellular processes, but the majority still remains uncharacterized. Our input is a measured isotope pattern from a high resolution mass spectrometer, and we want to find those molecules that best match this pattern. Determination of the sum formula is a crucial step in the identification of an unknown metabolite, as it reduces its possible structures to a hopefully manageable set. Our method is computationally efficient, and first results on experimental data indicate good identification rates for chemical compounds up to 700 Dalton. Above 1000 Dalton, the number of molecules with a certain mass increases rapidly. To efficiently analyze mass spectra of such molecules, we define several additive invariants extracted from the input and then propose to solve a joint decomposition problem.},
    doi = {10.1007/11851561_2},
    file = {BoeckerEtAl_DecomposingMetabolomicIsotopePatterns_WABI_2006.pdf:2006/BoeckerEtAl_DecomposingMetabolomicIsotopePatterns_WABI_2006.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; metabolite ms; TrACReview},
    opteditor = {Philipp B\"ucher and Bernard M. E. Moret},
    owner = {Sebastian},
    timestamp = {2006.10.17},
    }
  • S. Böcker and V. Mäkinen, "Combinatorial approaches for mass spectra recalibration," in Computational proteomics, 2006.
    [Bibtex]
    @InProceedings{boecker06combinatorial,
    author = {Sebastian B\"ocker and Veli M\"akinen},
    title = {Combinatorial Approaches for Mass Spectra Recalibration},
    booktitle = {Computational Proteomics},
    year = {2006},
    editor = {Christian Huber and Oliver Kohlbacher and Knut Reinert},
    number = {05471},
    series = {Dagstuhl Seminar Proceedings},
    publisher = {Internationales Begegnungs- und Forschungszentrum (IBFI), Schloss Dagstuhl, Germany},
    issn = {1862-4405},
    keywords = {jena; ms, recalibration, computational geometry},
    optaddress = {Dagstuhl, Germany},
    }
  • W. Timm, S. Böcker, T. Twellmann, and T. W. Nattkemper, "Peak intensity prediction for PMF mass spectra using support vector regression," in Proc. of international flins conference on applied artificial intelligence (flins 2006), 2006, p. 565–572.
    [Bibtex]
    @InProceedings{timm06peak,
    author = {Wiebke Timm and Sebastian B\"ocker and Thorsten Twellmann and Tim W. Nattkemper},
    title = {Peak Intensity Prediction for {PMF} Mass Spectra using Support Vector Regression},
    booktitle = {Proc. of International FLINS Conference on Applied Artificial Intelligence (FLINS 2006)},
    year = {2006},
    pages = {565--572},
    file = {TimmEtAl_PeakIntensityPrediction_FLINS_2006.pdf:2006/TimmEtAl_PeakIntensityPrediction_FLINS_2006.pdf:PDF},
    keywords = {jena; linkpdf; ms},
    owner = {Sebastian},
    timestamp = {2006.10.17},
    }

2005

  • S. Böcker and H. Kaltenbach, "Mass spectra alignments and their significance," in Proc. of combinatorial pattern matching symposium (cpm 2005), 2005, p. 429–441.
    [Bibtex]
    @InProceedings{boecker05mass,
    author = {Sebastian B\"ocker and Hans-Michael Kaltenbach},
    title = {Mass Spectra Alignments and Their Significance},
    booktitle = {Proc. of Combinatorial Pattern Matching Symposium (CPM 2005)},
    year = {2005},
    volume = {3537},
    series = lncs,
    pages = {429--441},
    publisher = Springer,
    comment = {Definition of Mass Spectra Alignments, Combinatorial derivation of fragment, mass-occurrence probabilities, estimation of moments of score distribution},
    file = {BoeckerKaltenbach_MSAlignmentsSignificance_CPM_2005.pdf:2005/BoeckerKaltenbach_MassSpectraAlignmentsSignificance_CPM_2005.pdf:PDF},
    journalref = {boecker07mass},
    keywords = {jena; linkpdf;},
    }
  • S. Böcker and relax Zs, "Efficient mass decomposition," in Proc. of acm symposium on applied computing (acm sac 2005), 2005, p. 151–157.
    [Bibtex]
    @InProceedings{boecker05efficient,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k},
    title = {Efficient Mass Decomposition},
    booktitle = {Proc. of ACM Symposium on Applied Computing (ACM SAC 2005)},
    year = {2005},
    pages = {151--157},
    publisher = {ACM press, New York},
    file = {BoeckerLiptak_EfficientMassDecomposition_ACMSAC_2005.pdf:2005/BoeckerLiptak_EfficientMassDecomposition_ACMSAC_2005.pdf:PDF},
    keywords = {jena; IDUN; linkpdf; frobenius},
    }
  • S. Böcker and relax Zs, "The Money Changing Problem revisited: computing the Frobenius number in time $O(k \, a_1)$," in Proc. of computing and combinatorics conference (cocoon 2005), 2005, p. 965–974.
    [Bibtex]
    @InProceedings{boecker05money,
    author = {Sebastian B\"ocker and {\relax Zs}uzsanna Lipt{\'a}k},
    title = {The {Money Changing Problem} revisited: Computing the {Frobenius} number in time {$O(k \, a_1)$}},
    booktitle = {Proc. of Computing and Combinatorics Conference (COCOON 2005)},
    year = {2005},
    volume = {3595},
    series = lncs,
    pages = {965--974},
    publisher = Springer,
    alteditor = {Lusheng Wang},
    file = {BoeckerLiptak_MoneyChangingProblem_COCOON_2005.pdf:2005/BoeckerLiptak_MoneyChangingProblem_COCOON_2005.pdf:PDF},
    journalref = {boecker07fast},
    keywords = {jena; IDUN; linkpdf; frobenius},
    }
  • S. Böcker and V. Mäkinen, "Maximum line-pair stabbing problem and its variations," in Proc. of european workshop on computational geometry (ewcg 2005), Eindhoven, Netherlands, 2005, p. 183–186.
    [Bibtex]
    @InProceedings{boecker05maximum,
    author = {Sebastian B\"ocker and Veli M\"akinen},
    title = {Maximum Line-Pair Stabbing Problem and its Variations},
    booktitle = {Proc. of European Workshop on Computational Geometry (EWCG 2005)},
    year = {2005},
    pages = {183--186},
    address = {Eindhoven, Netherlands},
    keywords = {jena; linkpdf;},
    url = {http://www.win.tue.nl/EWCG2005/Proceedings/47.pdf},
    }
  • S. Böcker and J. Stoye, "Informatische Methoden zur Protein- und Metaboliten-Identifikation mittels Massenspektrometrie," Laborpraxis, vol. 10, p. 24–26, 2005.
    [Bibtex]
    @Article{boecker05informatische,
    author = {Sebastian B\"ocker and Jens Stoye},
    title = {Informatische {Methoden} zur {Protein}- und {Metaboliten-Identifikation} mittels {Massenspektrometrie}},
    journal = {LaborPraxis},
    year = {2005},
    volume = {10},
    pages = {24--26},
    note = {German},
    keywords = {jena; IDUN; metabolite ms},
    }
  • [DOI] M. Ehrich, S. Böcker, and D. van den Boom, "Multiplexed discovery of sequence polymorphisms using base-specific cleavage and MALDI-TOF MS," Nucleic acids res, vol. 33, iss. 4, p. e38, 2005.
    [Bibtex]
    @Article{ehrich05multiplexed,
    author = {Ehrich, Mathias and B\"ocker, Sebastian and van den Boom, Dirk},
    title = {Multiplexed discovery of sequence polymorphisms using base-specific cleavage and {MALDI-TOF MS}},
    journal = {Nucleic Acids Res},
    year = {2005},
    volume = {33},
    number = {4},
    pages = {e38},
    doi = {10.1093/nar/gni038},
    file = {EhrichEtAl_MultiplexedDiscoverySequence_NucleicAcidsRes_2005.pdf:2005/EhrichEtAl_MultiplexedDiscoverySequence_NucleicAcidsRes_2005.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    pmid = {15731331},
    url = {http://nar.oxfordjournals.org/cgi/content/abstract/33/4/e38},
    }

2004

  • S. Böcker, "Unrooted supertrees: limitations, traps, and phylogenetic patchworks," in Phylogenetic supertrees: combining information to reveal the tree of life, O. R. P. Bininda-Emonds, Ed., Kluwer Academic, 2004, vol. 4, p. 331–351.
    [Bibtex]
    @InCollection{boecker04unrooted,
    author = {Sebastian B\"ocker},
    title = {Unrooted Supertrees: Limitations, Traps, and Phylogenetic Patchworks},
    booktitle = {Phylogenetic supertrees: Combining information to reveal the Tree of Life},
    publisher = {Kluwer Academic},
    year = {2004},
    editor = {Olaf R.P. Bininda-Emonds},
    volume = {4},
    series = {Computational Biology Book Series},
    chapter = {15},
    pages = {331--351},
    file = {Boecker_UnrootedSupertrees_CompBiolBookSeries_2004.pdf:2004/Boecker_UnrootedSupertrees_CompBiolBookSeries_2004.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics; supertrees;},
    }
  • [DOI] S. Böcker, "Sequencing from compomers: using mass spectrometry for DNA de-novo sequencing of 200+~nt," J comput biol, vol. 11, iss. 6, p. 1110–1134, 2004.
    [Bibtex]
    @Article{boecker04sequencing,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers: Using mass spectrometry for {DNA} de-novo sequencing of 200+~nt},
    journal = {J Comput Biol},
    year = {2004},
    volume = {11},
    number = {6},
    pages = {1110--1134},
    doi = {10.1089/cmb.2004.11.1110},
    file = {Boecker_SequencingFromCompomers_JCB_2004.pdf:2004/Boecker_SequencingFromCompomers_JCB_2004.pdf:PDF},
    keywords = {jena; linkpdf; dna ms, dna sequencing},
    pmid = {15662201},
    }
  • S. Böcker, "Sequencing from compomers: the puzzle," in Proc. of FUN with algorithms 2004, Tuscany, Italy, 2004, p. 132–146.
    [Bibtex]
    @InProceedings{boecker04sequencing-fun,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers: The puzzle},
    booktitle = {Proc. of {FUN} with Algorithms 2004},
    year = {2004},
    pages = {132--146},
    address = {Tuscany, Italy},
    file = {Boecker_SequencingFromCompomers_FUN_2004.pdf:2004/Boecker_SequencingFromCompomers_FUN_2004.pdf:PDF},
    journalref = {boecker06sequencing},
    keywords = {jena; linkpdf; dna sequencing, dna ms},
    }
  • S. Böcker, "Weighted sequencing from compomers: DNA de-novo sequencing from mass spectrometry data in the presence of false negative peaks," in Proc. of german conference on bioinformatics (gcb 2004), 2004, p. 13–23.
    [Bibtex]
    @InProceedings{boecker04weighted,
    author = {Sebastian B\"ocker},
    title = {Weighted Sequencing from Compomers: {DNA} de-novo sequencing from mass spectrometry data in the presence of false negative peaks},
    booktitle = {Proc. of German Conference on Bioinformatics (GCB 2004)},
    year = {2004},
    volume = {P-53},
    series = {Lecture Notes in Informatics},
    pages = {13--23},
    file = {Boecker_WeightedSequencingFromCompomers_GCB_2004.pdf:2004/Boecker_WeightedSequencingFromCompomers_GCB_2004.pdf:PDF},
    keywords = {jena; linkpdf; dna ms, dna sequencing},
    url = {http://subs.emis.de/LNI/Proceedings/Proceedings53/article2724.html},
    }
  • [DOI] M. Lefmann, C. Honisch, S. Böcker, N. Storm, F. von Wintzingerode, C. Schlötelburg, A. Moter, D. van den Boom, and U. B. Göbel, "A novel mass spectrometry based tool for genotypic identification of mycobacteria," J clin microbiol, vol. 42, iss. 1, p. 339–346, 2004.
    [Bibtex]
    @Article{lefmann04novel,
    author = {Michael Lefmann and Christiane Honisch and Sebastian B\"ocker and Niels Storm and Friedrich {von Wintzingerode} and Cord Schl\"otelburg and Annette Moter and Dirk {van den Boom} and Ulf B. G\"obel},
    title = {A novel mass spectrometry based tool for genotypic identification of mycobacteria},
    journal = {J Clin Microbiol},
    year = {2004},
    volume = {42},
    number = {1},
    pages = {339--346},
    doi = {http://jcm.asm.org/cgi/reprint/42/1/339},
    keywords = {jena; linkpdf; dna ms},
    pmid = {14715774},
    }
  • A. Stamatakis, T. Ludwig, and H. Meier, "Parallel inference of a 10.000-taxon phylogeny with maximum likelihood," in Proc. of international euro-par conference (euro-par 2004), 2004, p. 997–1004.
    [Bibtex]
    @InProceedings{stamatakis04parallel,
    author = {Alexandros Stamatakis and Thomas Ludwig and Harald Meier},
    title = {Parallel Inference of a 10.000-taxon Phylogeny with Maximum Likelihood},
    booktitle = {Proc. of International Euro-Par Conference (Euro-Par 2004)},
    year = {2004},
    volume = {3149},
    series = lncs,
    pages = {997--1004},
    publisher = Springer,
    keywords = {phylogenetics; ML; maximum likelihood},
    owner = {malte},
    timestamp = {2011.03.09},
    }
  • P. Stanssens, M. Zabeau, G. Meersseman, G. Remes, Y. Gansemans, N. Storm, R. Hartmer, C. Honisch, C. P. Rodi, S. Böcker, and D. van den Boom, "High-throughput MALDI-TOF discovery of genomic sequence polymorphisms," Genome res, vol. 14, p. 126–133, 2004.
    [Bibtex]
    @Article{stanssens04high-throughput,
    author = {Patrick Stanssens and Marc Zabeau and Geert Meersseman and Gwen Remes and Yannick Gansemans and Niels Storm and Ralf Hartmer and Christiane Honisch and Charles P. Rodi and Sebastian B\"ocker and Dirk {van den Boom}},
    title = {High-throughput {MALDI-TOF} Discovery of Genomic Sequence Polymorphisms},
    journal = {Genome Res},
    year = {2004},
    volume = {14},
    pages = {126--133},
    keywords = {jena; linkpdf; dna ms},
    url = {http://www.genome.org/cgi/content/full/14/1/126},
    }

2003

  • [DOI] S. Böcker, "Sequencing from compomers: using mass spectrometry for DNA de-novo sequencing of 200+~nt (extended abstract)," in Proc. of workshop on algorithms in bioinformatics (wabi 2003), 2003, p. 476–497.
    [Bibtex]
    @InProceedings{boecker03sequencing,
    author = {Sebastian B\"ocker},
    title = {Sequencing from compomers: Using mass spectrometry for {DNA} de-novo sequencing of 200+~nt (extended abstract)},
    booktitle = {Proc. of Workshop on Algorithms in Bioinformatics (WABI 2003)},
    year = {2003},
    volume = {2812},
    series = lncs,
    pages = {476--497},
    publisher = Springer,
    doi = {10.1007/b13243},
    file = {:2003/Boecker_SequencingFromCompomers_WABI_2003.pdf:PDF},
    journalref = {boecker04sequencing},
    keywords = {jena; linkpdf; dna ms, dna sequencing},
    }
  • [DOI] S. Böcker, "SNP and mutation discovery using base-specific cleavage and MALDI-TOF mass spectrometry," Bioinformatics, vol. 19, p. i44–i53, 2003.
    [Bibtex]
    @Article{boecker03snp,
    author = {Sebastian B\"ocker},
    title = {{SNP} and mutation discovery using base-specific cleavage and {MALDI-TOF} mass spectrometry},
    journal = {Bioinformatics},
    year = {2003},
    volume = {19},
    pages = {i44--i53},
    note = {Proc. of \emph{Intelligent Systems for Molecular Biology} (ISMB 2003)},
    doi = {10.1093/bioinformatics/btg1004},
    keywords = {jena; linkpdf; dna ms},
    pmid = {12855436},
    url = {http://www.iscb.org/ismb2003/paperAbstracts/btg1004.pdf},
    }

2002

  • [DOI] S. Böcker, "Exponentially many supertrees," Appl math let, vol. 15, iss. 7, p. 861–865, 2002.
    [Bibtex]
    @Article{boecker02exponentially,
    author = {Sebastian B\"ocker},
    title = {Exponentially many supertrees},
    journal = {Appl Math Let},
    year = {2002},
    volume = {15},
    number = {7},
    pages = {861--865},
    doi = {10.1016/S0893-9659(02)00054-X},
    file = {Boecker_ExponentiallyManySupertrees_ApplMathLetters_2002.pdf:2002/Boecker_ExponentiallyManySupertrees_ApplMathLetters_2002.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }
  • [DOI] F. von Wintzingerode, S. Böcker, C. Schlötelburg, N. H. L. Chiu, N. Storm, C. Jurinke, C. R. Cantor, U. B. Göbel, and D. van den Boom, "Base-specific fragmentation of amplified 16S rRNA genes and mass spectrometry analysis: a novel tool for rapid bacterial identification," Proc natl acad sci u s a, vol. 99, iss. 10, p. 7039–7044, 2002.
    [Bibtex]
    @Article{von-wintzingerode02base-specific,
    author = {Friedrich {von Wintzingerode} and Sebastian B\"ocker and Cord Schl\"otelburg and Norman H.L. Chiu and Niels Storm and Christian Jurinke and Charles R. Cantor and Ulf B. G\"obel and Dirk {van den Boom}},
    title = {Base-specific fragmentation of amplified {16{S} rRNA} genes and mass spectrometry analysis: A novel tool for rapid bacterial identification},
    journal = {Proc Natl Acad Sci U S A},
    year = {2002},
    volume = {99},
    number = {10},
    pages = {7039--7044},
    doi = {10.1073/pnas.102165899},
    file = {vonWintzingerodeEtAl_BaseSpecificFragmentation16SrRNA_PNAS_2002.pdf:2002/vonWintzingerodeEtAl_BaseSpecificFragmentation16SrRNA_PNAS_2002.pdf:PDF},
    keywords = {jena; linkpdf; dna ms},
    }

2001

  • [DOI] S. Böcker and A. W. M. Dress, "Patchworks," Adv math, vol. 157, iss. 1, p. 1–21, 2001.
    [Bibtex]
    @Article{boecker01patchworks,
    author = {Sebastian B\"ocker and Andreas W.M. Dress},
    title = {Patchworks},
    journal = {Adv Math},
    year = {2001},
    volume = {157},
    number = {1},
    pages = {1--21},
    doi = {doi:10.1006/aima.1999.1912},
    file = {BoeckerDress_Patchworks_AdvMath_2001.pdf:2001/BoeckerDress_Patchworks_AdvMath_2001.pdf:PDF},
    keywords = {jena; linkpdf;},
    }

2000

  • [DOI] S. Böcker, D. Bryant, A. W. M. Dress, and M. A. Steel, "Algorithmic aspects of tree amalgamation," J algorithms, vol. 37, p. 522–537, 2000.
    [Bibtex]
    @Article{boecker00algorithmic,
    author = {Sebastian B\"ocker and David Bryant and Andreas W.M. Dress and Michael A. Steel},
    title = {Algorithmic aspects of tree amalgamation},
    journal = {J Algorithms},
    year = {2000},
    volume = {37},
    pages = {522--537},
    doi = {10.1006/jagm.2000.1116},
    file = {BoeckerEtAl_AlgorithmicAspectsTreeAmalgamation_JAlgorithms_2000.pdf:2000/BoeckerEtAl_AlgorithmicAspectsTreeAmalgamation_JAlgorithms_2000.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }
  • [DOI] S. Böcker and A. W. M. Dress, "A note on maximal hierarchies," Adv math, vol. 151, iss. 2, p. 270–282, 2000.
    [Bibtex]
    @Article{boecker00note,
    author = {Sebastian B\"ocker and Andreas W.M. Dress},
    title = {A Note on Maximal Hierarchies},
    journal = {Adv Math},
    year = {2000},
    volume = {151},
    number = {2},
    pages = {270--282},
    doi = {doi:10.1006/aima.1999.1891},
    file = {BoeckerDress_NoteMaximalHierarchies_AdvMath_2000.pdf:2000/BoeckerDress_NoteMaximalHierarchies_AdvMath_2000.pdf:PDF},
    keywords = {jena; linkpdf;},
    }
  • M. A. Steel, A. W. M. Dress, and S. Böcker, "Simple but fundamental limitations on supertree and consensus tree methods," Syst biol, vol. 49, iss. 2, p. 363–368, 2000.
    [Bibtex]
    @Article{steel00simple,
    author = {Michael A. Steel and Andreas W.M. Dress and Sebastian B\"ocker},
    title = {Simple but fundamental limitations on supertree and consensus tree methods},
    journal = {Syst Biol},
    year = {2000},
    volume = {49},
    number = {2},
    pages = {363--368},
    file = {SteelEtAl_SimpleFundamentalLimitations_SystematicBiology_2000.pdf:2000/SteelEtAl_SimpleFundamentalLimitations_SystematicBiology_2000.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    pmid = {12118411},
    }

1999

  • S. Böcker, A. W. M. Dress, and M. A. Steel, "Patching up $X$-trees," Ann comb, vol. 3, p. 1–12, 1999.
    [Bibtex]
    @Article{boecker99patching,
    author = {Sebastian B\"ocker and Andreas W.M. Dress and Michael A. Steel},
    title = {Patching up {$X$}-Trees},
    journal = {Ann Comb},
    year = {1999},
    volume = {3},
    pages = {1--12},
    file = {BoeckerEtAl_PatchingUpXtrees_AnnalComb_1999.pdf:1999/BoeckerEtAl_PatchingUpXtrees_AnnalComb_1999.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }

1998

  • [DOI] S. Böcker and A. W. M. Dress, "Recovering symbolically dated, rooted trees from symbolic ultrametrics," Adv math, vol. 138, iss. 1, p. 105–125, 1998.
    [Bibtex]
    @Article{boecker98recovering,
    author = {Sebastian B\"ocker and Andreas W.M. Dress},
    title = {Recovering Symbolically Dated, Rooted Trees from Symbolic Ultrametrics},
    journal = {Adv Math},
    year = {1998},
    volume = {138},
    number = {1},
    pages = {105--125},
    doi = {doi:10.1006/aima.1998.1743},
    file = {BoeckerDress_RecoveringSymbolicallyDated_AdvMath_1998.pdf:1998/BoeckerDress_RecoveringSymbolicallyDated_AdvMath_1998.pdf:PDF},
    keywords = {jena; linkpdf; phylogenetics},
    }