From f1bfe038d1cea500084e0c27f468321489da49d8 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Wed, 4 Nov 2020 09:07:45 +1100 Subject: add core problems slide --- references.bib | 559 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ slides.tex | 30 ++++ 2 files changed, 589 insertions(+) create mode 100644 references.bib diff --git a/references.bib b/references.bib new file mode 100644 index 0000000..4f3708b --- /dev/null +++ b/references.bib @@ -0,0 +1,559 @@ +@article{platypus, + doi = {10.1038/ng.3036}, + url = {https://doi.org/10.1038/ng.3036}, + year = {2014}, + month = jul, + publisher = {Springer Nature}, + volume = {46}, + number = {8}, + pages = {912--918}, + author = {Andy Rimmer and Hang Phan and Iain Mathieson and Zamin Iqbal and Stephen R F Twigg and Andrew O M Wilkie and Gil McVean and Gerton Lunter}, + title = {Integrating mapping-, assembly- and haplotype-based approaches for calling variants in clinical sequencing applications}, + journal = {Nature Genetics} +} +@online{vieira_truly_2017, + title = {A truly reproducible scientific paper?}, + url = {https://medium.com/@bmpvieira/a-truly-reproducible-scientific-paper-5059b282ee9a}, + abstract = {The current situation}, + titleaddon = {Bruno Vieira}, + author = {Vieira, Bruno}, + urldate = {2019-01-23}, + date = {2017-02-01} +} + +@online{archibald_reproducible_2017, + title = {Reproducible Environments With Nix}, + url = {https://www.software.ac.uk/blog/2017-10-05-reproducible-environments-nix}, + titleaddon = {Software Sustainability Institute}, + author = {Archibald, Blair}, + urldate = {2019-01-23}, + date = {2017-10-05} +} + +@article{crouch_software_2013, + title = {The Software Sustainability Institute: Changing Research Software Attitudes and Practices}, + volume = {15}, + issn = {1521-9615}, + doi = {10.1109/MCSE.2013.133}, + shorttitle = {The Software Sustainability Institute}, + abstract = {To effect change, the Software Sustainability Institute works with researchers, developers, funders, and infrastructure providers to identify and address key issues with research software.}, + pages = {74--80}, + number = {6}, + journaltitle = {Computing in Science Engineering}, + author = {Crouch, S. and Hong, N. C. and Hettrick, S. and Jackson, M. and Pawlik, A. and Sufi, S. and Carr, L. and Roure, D. De and Goble, C. and Parsons, M.}, + date = {2013-11}, + keywords = {domain engineering, Domain engineering, maintainability, Programming, reliability, research and development, research software attitudes, research software practices, scientific computing, Scientific computing, Software development, software engineering, Software engineering, Software reliability, Software Sustainability Institute, Training} +} + +@misc{georges_dubus_mix:_2018, + location = {London}, + title = {Mix: Nix for data pipeline configuration}, + url = {https://www.youtube.com/watch?v=tc5ApNqhAQ4}, + howpublished = {NixCon}, + year = {2018}, + author = {{Georges Dubus}}, +} + +@online{noauthor_wdl_nodate, + title = {{WDL} {\textbar} Home}, + url = {https://software.broadinstitute.org/wdl/}, + urldate = {2019-01-23} +} + +@online{noauthor_reusable_2019, + title = {Reusable Reproducible Composable Software}, + rights = {{MPL}-2.0}, + url = {https://github.com/fractalide/fractalide}, + publisher = {Fractalide}, + date = {2019-01-22}, +} +@article{gruning_practical_2018, + title = {Practical Computational Reproducibility in the Life Sciences}, + volume = {6}, + issn = {2405-4712}, + url = {https://www.cell.com/cell-systems/abstract/S2405-4712(18)30140-6}, + doi = {10.1016/j.cels.2018.03.014}, + pages = {631--635}, + number = {6}, + journaltitle = {Cell Systems}, + shortjournal = {cels}, + author = {Grüning, Björn and Chilton, John and Köster, Johannes and Dale, Ryan and Soranzo, Nicola and Beek, Marius van den and Goecks, Jeremy and Backofen, Rolf and Nekrutenko, Anton and Taylor, James}, + urldate = {2018-11-20}, + date = {2018-06-27}, + pmid = {29953862} +} +@online{noauthor_haskell_2019, + title = {A Haskell re-implementation of the Nix expression language}, + url = {https://github.com/haskell-nix/hnix}, + publisher = {Nix in Haskell}, + date = {2019-01-22} +} + +@misc{amstutz_common_2016, + title = {Common Workflow Language, v1.0}, + url = {https://figshare.com/articles/Common_Workflow_Language_draft_3/3115156}, + abstract = {The Common Workflow Language ({CWL}) is an informal, multi-vendor working group consisting of various organizations and individuals that have an interest in portability of data analysis workflows. Our goal is to create specifications that enable data scientists to describe analysis tools and workflows that are +powerful, easy to use, portable, and support reproducibility.{CWL} builds on technologies such as {JSON}-{LD} and Avro for data modeling and Docker for portable runtime environments. + +{CWL} is designed to express workflows for data-intensive science, such as Bioinformatics, Medical Imaging, Chemistry, Physics, and Astronomy.This is v1.0 of the {CWL} tool and workflow specification, released on 2016-07-08.The specification, in {HTML} format, is in the draft-3/docs folder.}, + author = {Amstutz, Peter and Crusoe, Michael R. and Tijanić, Nebojša and Chapman, Brad and Chilton, John and Heuer, Michael and Kartashov, Andrey and Leehr, Dan and Ménager, Hervé and Nedeljkovich, Maya and Scales, Matt and Soiland-Reyes, Stian and Stojanovic, Luka}, + urldate = {2019-01-23}, + date = {2016-07-08}, + doi = {10.6084/m9.figshare.3115156.v2}, + keywords = {commonwl} +} +@article{cnvkit, + doi = {10.1371/journal.pcbi.1004873}, + url = {https://doi.org/10.1371/journal.pcbi.1004873}, + year = {2016}, + month = apr, + publisher = {Public Library of Science ({PLoS})}, + volume = {12}, + number = {4}, + pages = {e1004873}, + author = {Eric Talevich and A. Hunter Shain and Thomas Botton and Boris C. Bastian}, + title = {{CNVkit}: Genome-Wide Copy Number Detection and Visualization from Targeted {DNA} Sequencing}, + journal = {{PLOS} Computational Biology} +} +@article{di_tommaso_nextflow_2017, + title = {Nextflow enables reproducible computational workflows}, + volume = {35}, + rights = {2017 Nature Publishing Group}, + issn = {1546-1696}, + url = {https://www.nature.com/articles/nbt.3820}, + doi = {10.1038/nbt.3820}, + abstract = {Nextflow enables reproducible computational workflows}, + pages = {316--319}, + journaltitle = {Nature Biotechnology}, + author = {Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W. and Barja, Pablo Prieto and Palumbo, Emilio and Notredame, Cedric}, + urldate = {2019-01-23}, + date = {2017-04-11}, + langid = {english} +} +@article{strelka, + doi = {10.1038/s41592-018-0051-x}, + url = {https://doi.org/10.1038/s41592-018-0051-x}, + year = {2018}, + month = jul, + publisher = {Springer Nature America, Inc}, + volume = {15}, + number = {8}, + pages = {591--594}, + author = {Sangtae Kim and Konrad Scheffler and Aaron L. Halpern and Mitchell A. Bekritsky and Eunho Noh and Morten K\"{a}llberg and Xiaoyu Chen and Yeonbin Kim and Doruk Beyter and Peter Krusche and Christopher T. Saunders}, + title = {Strelka2: fast and accurate calling of germline and somatic variants}, + journal = {Nature Methods} +} +@article{samtools, + doi = {10.1093/bioinformatics/btp352}, + url = {https://doi.org/10.1093/bioinformatics/btp352}, + year = {2009}, + month = jun, + publisher = {Oxford University Press ({OUP})}, + volume = {25}, + number = {16}, + pages = {2078--2079}, + author = {H. Li and B. Handsaker and A. Wysoker and T. Fennell and J. Ruan and N. Homer and G. Marth and G. Abecasis and R. Durbin and}, + title = {The Sequence Alignment/Map format and {SAMtools}}, + journal = {Bioinformatics} +} +@article{bwa, + doi = {10.1093/bioinformatics/btp324}, + url = {https://doi.org/10.1093/bioinformatics/btp324}, + year = {2009}, + month = may, + publisher = {Oxford University Press ({OUP})}, + volume = {25}, + number = {14}, + pages = {1754--1760}, + author = {H. Li and R. Durbin}, + title = {Fast and accurate short read alignment with Burrows-Wheeler transform}, + journal = {Bioinformatics} +} +@online{bwa-mem, +Author = {Heng Li}, +Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM}, +Year = {2013}, +eprinttype = {arXiv}, +eprint = {q-bio/1303.3997}, +} +@online{bioshake, +author={Justin Bed\H{o}}, +title={Bioshake: a Haskell EDSL for bioinformatics pipelines}, +year=2018, +eprinttype={bioRxiv}, +eprint = {http://doi.org/10.1101/529479} +} +@online{bioshake-git, + title = {Bioshake}, + url = {https://github.com/PapenfussLab/bioshake}, + month = jan, + year = {2019} +} + +@online{torque, +title={TORQUE Resource Manager}, +month = jan, +year = {2019}, +url = {http://www.adaptivecomputing.com/products/torque/} +} +@online{wdl-scatter-gather, +title={Workflow Description Language - Specification and Implementations}, +month = jan, +year = {2019}, +url = {https://github.com/openwdl/wdl/blob/721e16f28f0bf5b3ae8b44df2859b504e10ae13f/README.md#scattergather} +} +@online{nextflow-example, +title = {Nextflow - Basic pipeline}, +month = jan, +year = {2019}, +url = {https://www.nextflow.io/example1.html} +} +@online{docker, +title = {Enterprise container platform}, +month = oct, +year = {2018}, +url = {https://www.docker.com} +} +@online{singularity, + title = {Singularity}, + month = oct, + year = {2018}, + url = {https://www.sylabs.io/singularity/} +} +@online{conda, +title = {Package, dependency and environment management for any language—Python, R, Ruby, Lua, Scala, Java, JavaScript, C/ C++, FORTRAN}, +month = oct, +year = {2018}, +url = {https://conda.io/docs/} +} +@article{ruffus, + doi = {10/cwkcs3}, + year = 2010, + month = sep, + publisher = {Oxford University Press ({OUP})}, + volume = 26, + number = 21, + pages = {2778--2779}, + author = {L. Goodstadt}, + title = {Ruffus: a lightweight Python library for computational pipelines}, + journal = {Bioinformatics} +} +@article{toil, + doi = {10.1038/nbt.3772}, + year = {2017}, + month = apr, + publisher = {Springer Nature}, + volume = {35}, + number = {4}, + pages = {314--316}, + author = {John Vivian and Arjun Arkal Rao and Frank Austin Nothaft and Christopher Ketchum and Joel Armstrong and Adam Novak and Jacob Pfeil and Jake Narkizian and Alden D Deran and Audrey Musselman-Brown and Hannes Schmidt and Peter Amstutz and Brian Craft and Mary Goldman and Kate Rosenbloom and Melissa Cline and Brian O'Connor and Megan Hanna and Chet Birger and W James Kent and David A Patterson and Anthony D Joseph and Jingchun Zhu and Sasha Zaranek and Gad Getz and David Haussler and Benedict Paten}, + title = {Toil enables reproducible, open source, big biomedical data analyses}, + journal = {Nature Biotechnology} +} +@phdthesis{dolstra2006, + title = {The Purely Functional Software Deployment Model}, + author = {Eelco Dolstra}, + school = {Faculty of Science, Utrecht, The Netherlands}, + year = {2006} +} +@inproceedings{dolstra2004, +title = {Nix: A Safe and Policy-Free System for Software Deployment}, +author = {Eelco Dolstra and Merijn de Jonge and Eelco Visser}, +crossref = {lisa2004} +} +@proceedings{lisa2004, +title = {Proceedings of the 18th Large Installation System Administration Conference}, +booktitle = {Proceedings of the 18th Large Installation System Administration Conference}, +venue = {Atlanta}, +month = nov, +year = {2004} +} +@online{nixos, +title = {NixOS}, +month = jan, +year = {2019}, +url = {https://www.nixos.org/nixos} +} +@article{dolstra2010, +title = {NixOS: A Purely Functional Linux Distribution}, +year = {2010}, +author = {Eelco Dolstra and Andras Löh and Nicolas Pierron}, +journal = {Journal of Functional Programming}, +pages = {577--615}, +publisher = {Cambridge University Press} +} +@article{rep2016, + doi = {10.1038/533437a}, + url = {https://doi.org/10.1038/533437a}, + year = {2016}, + month = may, + publisher = {Springer Nature}, + volume = {533}, + number = {7604}, + pages = {437--437}, + title = {Reality check on reproducibility}, + journal = {Nature} +} +@proceedings{ir2018, +title = {Challenges in irreproducible research}, +booktitle = {Challenges in irreproducible research}, +year = {2018}, +publisher = {Nature} +} +@online{nixpkgs, +title = {nixpkgs}, +month = jan, +year = {2019}, +url = {https://www.nixos.org/nixpkgs} +} +@article{cameron_gridss:_2017, + title = {{GRIDSS}: sensitive and specific genomic rearrangement detection using positional de Bruijn graph assembly}, + volume = {27}, + issn = {1088-9051, 1549-5469}, + url = {http://genome.cshlp.org/content/27/12/2050}, + doi = {10.1101/gr.222109.117}, + shorttitle = {{GRIDSS}}, + abstract = {The identification of genomic rearrangements with high sensitivity and specificity using massively parallel sequencing remains a major challenge, particularly in precision medicine and cancer research. Here, we describe a new method for detecting rearrangements, {GRIDSS} (Genome Rearrangement {IDentification} Software Suite). {GRIDSS} is a multithreaded structural variant ({SV}) caller that performs efficient genome-wide break-end assembly prior to variant calling using a novel positional de Bruijn graph-based assembler. By combining assembly, split read, and read pair evidence using a probabilistic scoring, {GRIDSS} achieves high sensitivity and specificity on simulated, cell line, and patient tumor data, recently winning {SV} subchallenge \#5 of the {ICGC}-{TCGA} {DREAM}8.5 Somatic Mutation Calling Challenge. On human cell line data, {GRIDSS} halves the false discovery rate compared to other recent methods while matching or exceeding their sensitivity. {GRIDSS} identifies nontemplate sequence insertions, microhomologies, and large imperfect homologies, estimates a quality score for each breakpoint, stratifies calls into high or low confidence, and supports multisample analysis.}, + pages = {2050--2060}, + number = {12}, + journaltitle = {Genome Research}, + shortjournal = {Genome Res.}, + author = {Cameron, Daniel L. and Schröder, Jan and Penington, Jocelyn Sietsma and Do, Hongdo and Molania, Ramyar and Dobrovic, Alexander and Speed, Terence P. and Papenfuss, Anthony T.}, + urldate = {2019-01-21}, + date = {2017-12-01}, + langid = {english}, + pmid = {29097403} +} +@inproceedings{bzeznik_nix_2017, + location = {Denver, {CO}, {USA}}, + title = {Nix as {HPC} package management system}, + isbn = {978-1-4503-5130-0}, + url = {http://dl.acm.org/citation.cfm?doid=3152493.3152556}, + doi = {10.1145/3152493.3152556}, + eventtitle = {the Fourth International Workshop}, + pages = {1--6}, + booktitle = {Proceedings of the Fourth International Workshop on {HPC} User Support Tools - {HUST}'17}, + publisher = {{ACM} Press}, + author = {Bzeznik, Bruno and Henriot, Oliver and Reis, Valentin and Richard, Olivier and Tavard, Laure}, + urldate = {2019-01-21}, + date = {2017}, + langid = {english} +} +@misc{nixcon_pierre-antoine_nodate, + author = {Pierre-Antoine Bouttier}, + title = {Nix as {HPC} package management system}, + url = {https://www.youtube.com/watch?v=s5iY3CsdSfQ}, + howpublished = {NixCon}, + year = 2018 +} +@article{wurmus_pigx:_2018, + title = {{PiGx}: reproducible genomics analysis pipelines with {GNU} Guix}, + volume = {7}, + url = {https://academic.oup.com/gigascience/article/7/12/giy123/5114263}, + doi = {10.1093/gigascience/giy123}, + shorttitle = {{PiGx}}, + abstract = {Abstract. In bioinformatics, as well as other computationally intensive research fields, there is a need for workflows that can reliably produce consistent out}, + number = {12}, + journaltitle = {{GigaScience}}, + shortjournal = {Gigascience}, + author = {Wurmus, Ricardo and Uyar, Bora and Osberg, Brendan and Franke, Vedran and Gosdschan, Alexander and Wreczycka, Katarzyna and Ronen, Jonathan and Akalin, Altuna}, + urldate = {2019-01-21}, + date = {2018-12-01}, + langid = {english} +} +@article{brandt_computation_2017, + title = {Computation semantics of the functional scientific workflow language Cuneiform*}, + volume = {27}, + issn = {0956-7968, 1469-7653}, + url = {http://www.cambridge.org/core/journals/journal-of-functional-programming/article/computation-semantics-of-the-functional-scientific-workflow-language-cuneiform/1A3B8AB825939117C5BD9F850F63ADCC}, + doi = {10.1017/S0956796817000119}, + abstract = {Cuneiform is a minimal functional programming language for large-scale scientific data analysis. Implementing a strict black-box view on external operators and data, it allows the direct embedding of code in a variety of external languages like Python or R, provides data-parallel higher order operators for processing large partitioned data sets, allows conditionals and general recursion, and has a naturally parallelizable evaluation strategy suitable for multi-core servers and distributed execution environments like Hadoop, {HTCondor}, or distributed Erlang. Cuneiform has been applied in several data-intensive research areas including remote sensing, machine learning, and bioinformatics, all of which critically depend on the flexible assembly of pre-existing tools and libraries written in different languages into complex pipelines. This paper introduces the computation semantics for Cuneiform. It presents Cuneiform's abstract syntax, a simple type system, and the semantics of evaluation. Providing an unambiguous specification of the behavior of Cuneiform eases the implementation of interpreters which we showcase by providing a concise reference implementation in Erlang. The similarity of Cuneiform's syntax to the simply typed lambda calculus puts Cuneiform in perspective and allows a straightforward discussion of its design in the context of functional programming. Moreover, the simple type system allows the deduction of the language's safety up to black-box operators. Last, the formulation of the semantics also permits the verification of compilers to and from other workflow languages.}, + journaltitle = {Journal of Functional Programming}, + author = {Brandt, Jörgen and Reisig, Wolfgang and Leser, Ulf}, + urldate = {2019-07-16}, + date = {2017}, + langid = {english}, +} +@inproceedings{brandt_cuneiform:_2015, + location = {Brussels, Belgium}, + title = {Cuneiform: A Functional Language for Large Scale Scientific Data Analysis}, + volume = {1330}, + url = {http://ceur-ws.org/Vol-1330/paper-03.pdf}, + pages = {17--26}, + booktitle = {Proceedings of the Workshops of the {EDBT}/{ICDT}}, + author = {Brandt, Jörgen and Bux, Marc and Leser, Ulf}, + date = {2015-03} +} +@unpublished{wermus_ricardo_gwl:_2019, + title = {{GWL}: {GNU} Workflow Language}, + url = {https://www.youtube.com/watch?v=pwYhPqaUiGg}, + shorttitle = {{GWL}}, + abstract = {by Ricardo Wurmus + +At: {FOSDEM} 2019 +https://video.fosdem.org/2019/K.4.201... + +Room: K.4.201 +Scheduled start: 2019-02-02 13:35:00+01}, + note = {{FOSDEM} 2019}, + author = {Wurmus, Ricardo}, + urldate = {2019-07-16}, + date = {2019-02-02} +} + +@unpublished{janssen_roel_workflow_2017, + title = {Workflow management with {GNU} Guix}, + url = {https://www.youtube.com/watch?v=tpLcwfRXL28}, + abstract = {by Roel Janssen + +At: {FOSDEM} 2017 + +Combining programs to perform more powerful actions using scripting languagesseems a good idea, until portability and parallel execution on computingclusters become the main concerns of the script. {GNU} Guix used {GNU} Guile as adomain-specific language to describe software packages. In the same way, thecomponents that make up a workflow description can be expressed using {GNUGuile}. + +In this talk I would like to present the work I have done to implement asimple workflow language and a workflow execution engine (both in {GNU} Guile)to run programs on computing clusters. + + +Room: K.4.601 +Scheduled start: 2017-02-05 14:30:00}, + note = {{FOSDEM} 2017}, + author = {{Janssen, Roel}}, + urldate = {2019-07-16}, + date = {2017-02-05} +} +@article{Afgan2018, + doi = {10.1093/nar/gky379}, + url = {https://doi.org/10.1093/nar/gky379}, + year = {2018}, + month = may, + publisher = {Oxford University Press ({OUP})}, + volume = {46}, + number = {W1}, + pages = {W537--W544}, + author = {Enis Afgan and Dannon Baker and B{\'{e}}r{\'{e}}nice Batut and Marius van~den~Beek and Dave Bouvier and Martin {\v{C}}ech and John Chilton and Dave Clements and Nate Coraor and Bj\"{o}rn A Gr\"{u}ning and Aysam Guerler and Jennifer Hillman-Jackson and Saskia Hiltemann and Vahid Jalili and Helena Rasche and Nicola Soranzo and Jeremy Goecks and James Taylor and Anton Nekrutenko and Daniel Blankenberg}, + title = {The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2018 update}, + journal = {Nucleic Acids Research} +} +@article{Grning2018, + doi = {10.1038/s41592-018-0046-7}, + url = {https://doi.org/10.1038/s41592-018-0046-7}, + year = {2018}, + month = jul, + publisher = {Springer Science and Business Media {LLC}}, + volume = {15}, + number = {7}, + pages = {475--476}, + author = {Bj\"{o}rn Gr\"{u}ning and Ryan Dale and Andreas Sj\"{o}din and Brad A. Chapman and Jillian Rowe and Christopher H. Tomkins-Tinch and Renan Valieris and Johannes K\"{o}ster}, + title = {Bioconda: sustainable and comprehensive software distribution for the life sciences}, + journal = {Nature Methods} +} +@article{Lampa2019, + doi = {10.1093/gigascience/giz044}, + url = {https://doi.org/10.1093/gigascience/giz044}, + year = {2019}, + month = apr, + publisher = {Oxford University Press ({OUP})}, + volume = {8}, + number = {5}, + author = {Samuel Lampa and Martin Dahl\"{o} and Jonathan Alvarsson and Ola Spjuth}, + title = {{SciPipe}: A workflow library for agile development of complex and dynamic bioinformatics pipelines}, + journal = {{GigaScience}} +} +@misc{andrews2010, + address = {{Babraham, UK}}, + title = {{{FastQC}}}, + copyright = {GPL v3}, + abstract = {FastQC aims to provide a simple way to do some quality control checks on raw sequence data coming from high throughput sequencing pipelines. It provides a modular set of analyses which you can use to give a quick impression of whether your data has any problems of which you should be aware before doing any further analysis.}, + howpublished = {Babraham Institute}, + author = {Andrews, Simon and Krueger, Felix and {Segonds-Pichon}, Anne and Biggins, Laura and Krueger, Christel and Wingett, Steven}, + month = jan, + year = {2010} +} +@article{Li2018, + doi = {10.1093/bioinformatics/bty191}, + url = {https://doi.org/10.1093/bioinformatics/bty191}, + year = {2018}, + month = may, + publisher = {Oxford University Press ({OUP})}, + volume = {34}, + number = {18}, + pages = {3094--3100}, + author = {Heng Li}, + editor = {Inanc Birol}, + title = {Minimap2: pairwise alignment for nucleotide sequences}, + journal = {Bioinformatics} +} +@article{Cameron2019, + doi = {10.1101/781013}, + url = {https://doi.org/10.1101/781013}, + year = {2019}, + month = sep, + publisher = {Cold Spring Harbor Laboratory}, + author = {Daniel L. Cameron and Jonathan Baber and Charles Shale and Anthony T. Papenfuss and Jose Espejo Valle-Inclan and Nicolle Besselink and Edwin Cuppen and Peter Priestley}, + title = {{GRIDSS}, {PURPLE}, {LINX}: Unscrambling the tumor genome via integrated analysis of structural variation and copy number} +} +@misc{Picard2019toolkit, + title = {Picard toolkit}, + year = {2019}, + publisher = {Broad Institute}, + journal = {Broad Institute, GitHub repository}, + howpublished = {\url{http://broadinstitute.github.io/picard/}} +} +@online{workflowsGist, + author = {Bernie Pope}, + title = {Computational Data Analysis Workflow Systems}, + year = 2020, + url = {https://github.com/common-workflow-language/common-workflow-language/wiki/Existing-Workflow-systems}, + urldate = {2020-06-02} +} +@online{workflowsGithub, + title = {A curated list of awesome pipeline toolkits inspired by Awesome Sysadmin}, + url = {https://github.com/pditommaso/awesome-pipeline}, + urldate = {2020-06-02} +} +@article{Leipzig2016, + doi = {10.1093/bib/bbw020}, + url = {https://doi.org/10.1093/bib/bbw020}, + year = {2016}, + month = mar, + publisher = {Oxford University Press ({OUP})}, + pages = {bbw020}, + author = {Jeremy Leipzig}, + title = {A review of bioinformatic pipeline frameworks}, + journal = {Briefings in Bioinformatics} +} +@article{Koster2012, + doi = {10.1093/bioinformatics/bts480}, + url = {https://doi.org/10.1093/bioinformatics/bts480}, + year = {2012}, + month = aug, + publisher = {Oxford University Press ({OUP})}, + volume = {28}, + number = {19}, + pages = {2520--2522}, + author = {J. Koster and S. Rahmann}, + title = {Snakemake--a scalable bioinformatics workflow engine}, + journal = {Bioinformatics} +} +@online{cromwell, +url = {https://github.com/broadinstitute/cromwell}, +urldate = {2020-06-17} +} +@online{rubra, +url = {https://github.com/bjpop/rubra}, +urldate = {2020-06-17} +} +@online{wdl-container, +url = {https://cromwell.readthedocs.io/en/stable/tutorials/Containers/#specifying-containers-in-your-workflow}, +urldate = {2020-06-18} +} +@misc{gigadb, +doi = {10.5524/100782}, +url = {http://gigadb.org/dataset/100782}, +author = {Justin, Bedő and Leon, Di Stefano S and Anthony, Papenfuss T}, +keywords = {Software, Workflow, nix deployment system, reproducibility, package management, workflow engine, containers}, +language = {en}, +title = {Supporting data for "Unifying package managers, workflow engines, and containers with BioNix for computational reproducibility"}, +publisher = {GigaScience Database}, +year = {2020}, +copyright = {Creative Commons Zero v1.0 Universal} +} \ No newline at end of file diff --git a/slides.tex b/slides.tex index 2a2cdad..c4fb08d 100644 --- a/slides.tex +++ b/slides.tex @@ -3,6 +3,10 @@ \usepackage{microtype} \usepackage{tikz} \usetikzlibrary{fit} +\usepackage[style=verbose-ibid,url=false,natbib=true]{biblatex} +\addbibresource{references.bib} +\renewcommand*{\footnotesize}{\Tiny} +\setlength{\footnotesep}{3pt} \definecolor{bngreen}{HTML}{3c8e64} \definecolor{bnorange}{HTML}{e08919} @@ -24,6 +28,32 @@ \end{frame} } +\begin{frame} + \frametitle{Core problems} + \begin{enumerate} + \item \emph{Managing software versions and dependencies}. + \begin{itemize} + \item Conda + \item BioConda~\autocite{Grning2018} + \item apt-get + \end{itemize} + + \item \emph{Managing computational environments}. + \begin{itemize} + \item Docker + \item Singularity + \item \emph{virtual machines} + \end{itemize} + + \item \emph{Managing workflows}. + \begin{itemize} + \item Toil~\autocite{toil} + \item SnakeMake~\autocite{Koster2012} + \item NextFlow~\autocite{di_tommaso_nextflow_2017} + \end{itemize} + \end{enumerate} +\end{frame} + \begin{frame} \begin{center} \begin{tikzpicture}[->,>=stealth,shorten >=1pt,auto,node distance=2cm,thick] -- cgit v1.2.3