Continued from A3: Deriving Trust Levels for Multi-Choice Data Analysis Workflows
Description
Multi-choice data analysis workflows are used in computational materials science (CMS) to explore and analyze materials properties. Such workflows are composed of various programs, called codes, configured by input parameters. A3 will research new technologies for creating guidelines to scientists with details about how to compute the various materials properties to a desired level of accuracy and numerical precision. Here, methods for a reliable end-to-end data-quality assessment of CMS DAWs are an important yet challenging prerequisite.
Scientists
- Enrico Ahlers
- Noah Hoffmann
- Daniel Linhart
Publications
2025
Bechtel, Tim; Speckhard, Daniel T.; Godwin, Jonathan; Draxl, Claudia
Band-Gap Regression with Architecture-Optimized Message-Passing Neural Networks Journal Article
In: Chemistry of Materials, vol. 37, no. 4, pp. 1358-1369, 2025.
@article{doi:10.1021/acs.chemmater.4c01988,
title = {Band-Gap Regression with Architecture-Optimized Message-Passing Neural Networks},
author = { Tim Bechtel and Daniel T. Speckhard and Jonathan Godwin and Claudia Draxl},
url = {https://doi.org/10.1021/acs.chemmater.4c01988},
doi = {10.1021/acs.chemmater.4c01988},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {Chemistry of Materials},
volume = {37},
number = {4},
pages = {1358-1369},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Speckhard, Daniel T; Carbogno, Christian; Ghiringhelli, Luca M; Lubeck, Sven; Scheffler, Matthias; Draxl, Claudia
Extrapolation to the complete basis-set limit in density-functional theory using statistical learning Journal Article
In: Physical Review Materials, vol. 9, no. 1, pp. 013801, 2025.
@article{speckhard2025extrapolation,
title = {Extrapolation to the complete basis-set limit in density-functional theory using statistical learning},
author = { Daniel T Speckhard and Christian Carbogno and Luca M Ghiringhelli and Sven Lubeck and Matthias Scheffler and Claudia Draxl},
url = {https://journals.aps.org/prmaterials/pdf/10.1103/PhysRevMaterials.9.013801
https://doi.org/10.1103/PhysRevMaterials.9.013801},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {Physical Review Materials},
volume = {9},
number = {1},
pages = {013801},
publisher = {APS},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Speckhard, Daniel; Bechtel, Tim; Ghiringhelli, Luca M; Kuban, Martin; Rigamonti, Santiago; Draxl, Claudia
How big is big data? Journal Article
In: Faraday Discussions, vol. 256, pp. 483–502, 2025.
@article{speckhard2025big,
title = {How big is big data?},
author = { Daniel Speckhard and Tim Bechtel and Luca M Ghiringhelli and Martin Kuban and Santiago Rigamonti and Claudia Draxl},
url = {https://pubs.rsc.org/en/content/articlehtml/2025/fd/d4fd00102h
https://doi.org/10.1039/D4FD00102H},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {Faraday Discussions},
volume = {256},
pages = {483–502},
publisher = {Royal Society of Chemistry},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
Schintke, Florian; Belhajjame, Khalid; Mecquenem, Ninon De; Frantz, David; Guarino, Vanessa Emanuela; Hilbrich, Marcus; Lehmann, Fabian; Missier, Paolo; Sattler, Rebecca; Sparka, Jan Arne; Speckhard, Daniel T.; Stolte, Hermann; Vu, Anh Duc; Leser, Ulf
Validity constraints for data analysis workflows Journal Article
In: Future Generation Computer Systems, vol. 157, pp. 82–97, 2024, ISSN: 0167-739X.
@article{SCHINTKE2024,
title = {Validity constraints for data analysis workflows},
author = {Florian Schintke and Khalid Belhajjame and Ninon De Mecquenem and David Frantz and Vanessa Emanuela Guarino and Marcus Hilbrich and Fabian Lehmann and Paolo Missier and Rebecca Sattler and Jan Arne Sparka and Daniel T. Speckhard and Hermann Stolte and Anh Duc Vu and Ulf Leser},
url = {https://www.sciencedirect.com/science/article/pii/S0167739X24001079},
doi = {https://doi.org/10.1016/j.future.2024.03.037},
issn = {0167-739X},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {Future Generation Computer Systems},
volume = {157},
pages = {82--97},
abstract = {Porting a scientific data analysis workflow (DAW) to a cluster infrastructure, a new software stack, or even only a new dataset with some notably different properties is often challenging. Despite the structured definition of the steps (tasks) and their interdependencies during a complex data analysis in the DAW specification, relevant assumptions may remain unspecified and implicit. Such hidden assumptions often lead to crashing tasks without a reasonable error message, poor performance in general, non-terminating executions, or silent wrong results of the DAW, to name only a few possible consequences. Searching for the causes of such errors and drawbacks in a distributed compute cluster managed by a complex infrastructure stack, where DAWs for large datasets typically are executed, can be tedious and time-consuming. We propose validity constraints (VCs) as a new concept for DAW languages to alleviate this situation. A VC is a constraint specifying logical conditions that must be fulfilled at certain times for DAW executions to be valid. When defined together with a DAW, VCs help to improve the portability, adaptability, and reusability of DAWs by making implicit assumptions explicit. Once specified, VCs can be controlled automatically by the DAW infrastructure, and violations can lead to meaningful error messages and graceful behaviour (e.g., termination or invocation of repair mechanisms). We provide a broad list of possible VCs, classify them along multiple dimensions, and compare them to similar concepts one can find in related fields. We also provide a proof-of-concept implementation for the workflow system Nextflow.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Müller, Sebastian; Sparka, Jan Arne; Kuban, Martin; Draxl, Claudia; Grunske, Lars
Grammar-based fuzzing of data integration parsers in computational materials science Journal Article
In: Software: Practice and Experience, vol. n/a, no. n/a, 2023.
@article{https://doi.org/10.1002/spe.3266,
title = {Grammar-based fuzzing of data integration parsers in computational materials science},
author = {Sebastian Müller and Jan Arne Sparka and Martin Kuban and Claudia Draxl and Lars Grunske},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/spe.3266},
doi = {https://doi.org/10.1002/spe.3266},
year = {2023},
date = {2023-09-19},
journal = {Software: Practice and Experience},
volume = {n/a},
number = {n/a},
abstract = {Abstract Context Computational materials science (CMS) focuses on in silico experiments to compute the properties of known and novel materials, where many software packages are used in the community. The NOMAD Laboratory (Draxl C, Scheffler) offers to store the input and output files in its FAIR data repository. Since the file formats of these software packages are non-standardized, parsers are used to provide the results in a normalized format. Objective The main goal of this article is to report experience and findings of using grammar-based fuzzing on these parsers. Method We have constructed an input grammar for four common software packages in the CMS domain and performed an experimental evaluation on the capabilities of grammar-based fuzzing to detect failures in the Novel Materials Discovery (NOMAD) parsers. Results With our approach, we were able to identify three unique critical bugs concerning service availability, as well as several additional syntactic, semantic, logical, and downstream bugs in the investigated NOMAD parsers. We reported all issues to the developer team prior to publication. Conclusion Based on the experience gained, we can recommend grammar-based fuzzing also for other research software packages to improve the trust level in the correctness of the produced results.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gavini, Vikram; Baroni, Stefano; Blum, Volker; Bowler, David R; Buccheri, Alexander; Chelikowsky, James R; Das, Sambit; Dawson, William; Delugas, Pietro; Dogan, Mehmet; Draxl, Claudia; Galli, Giulia; Genovese, Luigi; Giannozzi, Paolo; Giantomassi, Matteo; Gonze, Xavier; Govoni, Marco; Gygi, François; Gulans, Andris; Herbert, John M; Kokott, Sebastian; Kühne, Thomas D; Liou, Kai-Hsin; Miyazaki, Tsuyoshi; Motamarri, Phani; Nakata, Ayako; Pask, John E; Plessl, Christian; Ratcliff, Laura E; Richard, Ryan M; Rossi, Mariana; Schade, Robert; Scheffler, Matthias; Schütt, Ole; Suryanarayana, Phanish; Torrent, Marc; Truflandier, Lionel; Windus, Theresa L; Xu, Qimen; Yu, Victor W-Z; Perez, D
Roadmap on electronic structure codes in the exascale era Journal Article
In: Modelling and Simulation in Materials Science and Engineering, vol. 31, no. 6, pp. 063301, 2023.
@article{Gavini_2023,
title = {Roadmap on electronic structure codes in the exascale era},
author = {Vikram Gavini and Stefano Baroni and Volker Blum and David R Bowler and Alexander Buccheri and James R Chelikowsky and Sambit Das and William Dawson and Pietro Delugas and Mehmet Dogan and Claudia Draxl and Giulia Galli and Luigi Genovese and Paolo Giannozzi and Matteo Giantomassi and Xavier Gonze and Marco Govoni and François Gygi and Andris Gulans and John M Herbert and Sebastian Kokott and Thomas D Kühne and Kai-Hsin Liou and Tsuyoshi Miyazaki and Phani Motamarri and Ayako Nakata and John E Pask and Christian Plessl and Laura E Ratcliff and Ryan M Richard and Mariana Rossi and Robert Schade and Matthias Scheffler and Ole Schütt and Phanish Suryanarayana and Marc Torrent and Lionel Truflandier and Theresa L Windus and Qimen Xu and Victor W-Z Yu and D Perez},
url = {https://dx.doi.org/10.1088/1361-651X/acdf06},
doi = {10.1088/1361-651X/acdf06},
year = {2023},
date = {2023-08-01},
urldate = {2023-08-01},
journal = {Modelling and Simulation in Materials Science and Engineering},
volume = {31},
number = {6},
pages = {063301},
publisher = {IOP Publishing},
abstract = {Electronic structure calculations have been instrumental in providing many important insights into a range of physical and chemical properties of various molecular and solid-state systems. Their importance to various fields, including materials science, chemical sciences, computational chemistry, and device physics, is underscored by the large fraction of available public supercomputing resources devoted to these calculations. As we enter the exascale era, exciting new opportunities to increase simulation numbers, sizes, and accuracies present themselves. In order to realize these promises, the community of electronic structure software developers will however first have to tackle a number of challenges pertaining to the efficient use of new architectures that will rely heavily on massive parallelism and hardware accelerators. This roadmap provides a broad overview of the state-of-the-art in electronic structure calculations and of the various new directions being pursued by the community. It covers 14 electronic structure codes, presenting their current status, their development priorities over the next five years, and their plans towards tackling the challenges and leveraging the opportunities presented by the advent of exascale computing.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tsigkanos, Christos; Rani, Pooja; Müller, Sebastian; Kehrer, Timo
Large Language Models: The Next Frontier for Variable Discovery within Metamorphic Testing? Proceedings Article
In: 2023 IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER), pp. 678–682, IEEE 2023.
@inproceedings{tsigkanos2023large,
title = {Large Language Models: The Next Frontier for Variable Discovery within Metamorphic Testing?},
author = {Christos Tsigkanos and Pooja Rani and Sebastian Müller and Timo Kehrer},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
booktitle = {2023 IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER)},
pages = {678–682},
organization = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Buccheri, Alexander; Peschel, Fabian; Maurer, Benedikt; Voiculescu, Mara; Speckhard, Daniel T.; Kleine, Hannah; Stephan, Elisa; Kuban, Martin; Draxl, Claudia
excitingtools: An exciting Workflow Tool Journal Article
In: Journal of Open Source Software, vol. 8, no. 85, pp. 5148, 2023.
@article{Buccheri2023,
title = {excitingtools: An exciting Workflow Tool},
author = {Alexander Buccheri and Fabian Peschel and Benedikt Maurer and Mara Voiculescu and Daniel T. Speckhard and Hannah Kleine and Elisa Stephan and Martin Kuban and Claudia Draxl},
url = {https://doi.org/10.21105/joss.05148},
doi = {10.21105/joss.05148},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Journal of Open Source Software},
volume = {8},
number = {85},
pages = {5148},
publisher = {The Open Journal},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tsigkanos, Christos; Rani, Pooja; Müller, Sebastian; Kehrer, Timo
Variable Discovery with Large Language Models for Metamorphic Testing of Scientific Software Proceedings Article
In: Mikyška, Jiří; Mulatier, Clélia; Paszynski, Maciej; Krzhizhanovskaya, Valeria V.; Dongarra, Jack J.; Sloot, Peter M. A. (Ed.): Computational Science - ICCS 2023 - 23rd International Conference, Prague, Czech Republic, July 3-5, 2023, Proceedings, Part I, pp. 321–335, Springer, 2023.
@inproceedings{DBLP:conf/iccS/TsigkanosRMK23,
title = {Variable Discovery with Large Language Models for Metamorphic Testing of Scientific Software},
author = {Christos Tsigkanos and Pooja Rani and Sebastian Müller and Timo Kehrer},
editor = {Jiří Mikyška and Clélia Mulatier and Maciej Paszynski and Valeria V. Krzhizhanovskaya and Jack J. Dongarra and Peter M. A. Sloot},
url = {https://doi.org/10.1007/978-3-031-35995-8_23},
doi = {10.1007/978-3-031-35995-8_23},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
booktitle = {Computational Science - ICCS 2023 - 23rd International Conference,
Prague, Czech Republic, July 3-5, 2023, Proceedings, Part I},
volume = {14073},
pages = {321–335},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Carbone, Matthew R.; Meng, Fanchen; Vorwerk, Christian; Maurer, Benedikt; Peschel, Fabian; Qu, Xiaohui; Stavitski, Eli; Draxl, Claudia; Vinson, John; Lu, Deyu
Lightshow: a Python package for generating computational x-ray absorption spectroscopy input files Journal Article
In: Journal of Open Source Software, vol. 8, no. 87, pp. 5182, 2023.
@article{Carbone2023,
title = {Lightshow: a Python package for generating computational x-ray absorption spectroscopy input files},
author = {Matthew R. Carbone and Fanchen Meng and Christian Vorwerk and Benedikt Maurer and Fabian Peschel and Xiaohui Qu and Eli Stavitski and Claudia Draxl and John Vinson and Deyu Lu},
url = {https://doi.org/10.21105/joss.05182},
doi = {10.21105/joss.05182},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Journal of Open Source Software},
volume = {8},
number = {87},
pages = {5182},
publisher = {The Open Journal},
keywords = {},
pubstate = {published},
tppubtype = {article}
}