Continued from B5: Adaptive, Distributed and Scalable Analysis of Massive Satellite Data
Description
DAWs for satellite data analysis are highly heterogeneous and complex in terms of input data, software, and resource requirements. Furthermore, data for DAWs are often available at different data centers, with data download being a major bottleneck in DAWs execution.
Our goal is to expand orchestration of Earth Observation Workflows (EOWs) from FONDA I to a federated multi-center scenario to enable analysis of changes in agricultural land use over very large, heterogeneous and distributed data sets.
Scientists
- Felix Kummer
- Katarzyna Ewa Lewińska
Publications
2026
West, Kathleen; Moawad, Youssef; Lehmann, Fabian; Bountris, Vasilis; Leser, Ulf; Elkhatib, Yehia; Thamsen, Lauritz
A Systematic Evaluation of the Potential of Carbon-Aware Execution for Scientific Workflows Proceedings Article
In: Future Generation Computer Systems, Future Generation Computer Systems, 2026.
@inproceedings{west2026systematicevaluationpotentialcarbonaware,
title = {A Systematic Evaluation of the Potential of Carbon-Aware Execution for Scientific Workflows},
author = {Kathleen West and Youssef Moawad and Fabian Lehmann and Vasilis Bountris and Ulf Leser and Yehia Elkhatib and Lauritz Thamsen},
url = {https://arxiv.org/abs/2508.14625},
year = {2026},
date = {2026-01-01},
urldate = {2026-01-01},
booktitle = {Future Generation Computer Systems},
journal = {Future Generation Computer Systems},
publisher = {Future Generation Computer Systems},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2025
Mecquenem, Ninon De; Bosse, Simon; Bountris, Vasilis; Lehmann, Fabian; Mohammadi, Somayeh; Karega, Pauline; Reinert, Knut; Leser, Ulf
Domain-Specific Data Compression for Nextflow with COMET-FLOW Proceedings Article
In: 2025 IEEE International Conference on Big Data (BigData), pp. 4774-4783, IEEE Computer Society, Los Alamitos, CA, USA, 2025.
@inproceedings{11401604,
title = { Domain-Specific Data Compression for Nextflow with COMET-FLOW },
author = {Ninon De Mecquenem and Simon Bosse and Vasilis Bountris and Fabian Lehmann and Somayeh Mohammadi and Pauline Karega and Knut Reinert and Ulf Leser},
url = {https://doi.ieeecomputersociety.org/10.1109/BigData66926.2025.11401604},
doi = {10.1109/BigData66926.2025.11401604},
year = {2025},
date = {2025-12-01},
urldate = {2025-12-01},
booktitle = {2025 IEEE International Conference on Big Data (BigData)},
pages = {4774-4783},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
abstract = {Scientific workflows are increasingly adopted for large-scale data analysis because of their reproducibility, scalability, and trust-enhancing documentation of analysis processes. These workflows are typically I/O-heavy, processing large volumes of data. This is especially the case in bioinformatics where the amount of data to store is very large. In genomics, a lot of biotools directly integrate gzip decompression of the input data and compression of the output data, making it the default choice in this field. However, gzip is a general-purpose tool, and many domain-specific compression methods outperform it both on compression ratio and speed. Implementing these domain-specific compression methods within existing workflows requires manual code adaptation for every task, which is time-consuming and error-prone. To relieve developers, we propose our COmpressionMEThod for workFLOW engines (COMET-FLOW). The idea behind this method is to shift the compression responsibility from individual tools to the workflow execution engine, allowing uniform handling of all tasks. This approach requires only minimal adjustments to existing workflows and facilitates future support for other data formats and beyond Bioinformatics. We implemented COMET-FLOW in the popular workflow system Nextflow, enabling the workflow engine to automatically manage compression and decompression of files at runtime. Experiments with three nf-core workflows and several datasets showed that our approach can reduce storage size by up to 31% and runtime up to 41% compared to standard implementations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lehmann, Fabian; Bader, Jonathan; Tschirpke, Friedrich; Mecquenem, Ninon De; Lößer, Ansgar; Becker, Sören; Lewińska, Katarzyna Ewa; Thamsen, Lauritz; Leser, Ulf
WOW: Workflow-Aware Data Movement and Task Scheduling for Dynamic Scientific Workflows Proceedings Article
In: 2025 IEEE 25th International Symposium on Cluster, Cloud and Internet Computing (CCGrid), Tromsø, Norway, 2025.
@inproceedings{lehmannWOW2025,
title = {WOW: Workflow-Aware Data Movement and Task Scheduling for Dynamic Scientific Workflows},
author = { Fabian Lehmann and Jonathan Bader and Friedrich Tschirpke and Ninon De Mecquenem and Ansgar Lößer and Sören Becker and Katarzyna Ewa Lewińska and Lauritz Thamsen and Ulf Leser},
year = {2025},
date = {2025-05-01},
urldate = {2025-05-01},
booktitle = {2025 IEEE 25th International Symposium on Cluster, Cloud and Internet Computing (CCGrid)},
address = {Tromsø, Norway},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
West, Kathleen; Lehmann, Fabian; Bountris, Vasilis; Leser, Ulf; Elkhatib, Yehia; Thamsen, Lauritz
Exploring the Potential of Carbon-Aware Execution for Scientific Workflows Proceedings Article
In: 2025 IEEE 25th International Symposium on Cluster, Cloud and Internet Computing (CCGrid), Tromsø, Norway, 2025.
@inproceedings{lehmannWOW2025b,
title = {Exploring the Potential of Carbon-Aware Execution for Scientific Workflows},
author = { Kathleen West and Fabian Lehmann and Vasilis Bountris and Ulf Leser and Yehia Elkhatib and Lauritz Thamsen},
year = {2025},
date = {2025-05-01},
urldate = {2025-05-01},
booktitle = {2025 IEEE 25th International Symposium on Cluster, Cloud and Internet Computing (CCGrid)},
address = {Tromsø, Norway},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lewińska, Katarzyna Ewa; Okujeni, Akpona; Kowalski, Katja; Lehmann, Fabian; Radeloff, Volker C.; Leser, Ulf; Hostert, Patrick
Impact of data density and endmember definitions on long-term trends in ground cover fractions across European grasslands Journal Article
In: Remote Sensing of Environment, vol. 323, pp. 114736, 2025, ISSN: 0034-4257.
@article{LEWINSKA2025114736,
title = {Impact of data density and endmember definitions on long-term trends in ground cover fractions across European grasslands},
author = { Katarzyna Ewa Lewińska and Akpona Okujeni and Katja Kowalski and Fabian Lehmann and Volker C. Radeloff and Ulf Leser and Patrick Hostert},
url = {https://www.sciencedirect.com/science/article/pii/S0034425725001403},
doi = {https://doi.org/10.1016/j.rse.2025.114736},
issn = {0034-4257},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {Remote Sensing of Environment},
volume = {323},
pages = {114736},
abstract = {Long-term monitoring of grasslands is pivotal for ensuring continuity of many environmental services and for supporting food security and environmental modeling. Remote sensing provides an irreplaceable source of information for studying changes in grasslands. Specifically, Spectral Mixture Analysis (SMA) allows for quantification of physically meaningful ground cover fractions of grassland ecosystems (i.e., green vegetation, non-photosynthetic vegetation, and soil), which is crucial for our understanding of change processes and their drivers. However, although popular due to straightforward implementation and low computational cost, ‘classical’ SMA relies on a single endmember definition for each targeted ground cover component, thus offering limited suitability and generalization capability for heterogeneous landscapes. Furthermore, the impact of irregular data density on SMA-based long-term trends in grassland ground cover has also not yet been critically addressed. We conducted a systematic assessment of i) the impact of data density on long-term trends in ground cover fractions in grasslands; and ii) the effect of endmember definition used in ‘classical’ SMA on pixel- and map-level trends of grassland ground cover fractions. We performed our study for 13 sites across European grasslands and derived the trends based on the Cumulative Endmember Fractions calculated from monthly composites. We compared three different data density scenarios, i.e., 1984–2021 Landsat data record as is, 1984–2021 Landsat data record with the monthly probability of data after 2014 adjusted to the pre-2014 levels, and the combined 1984–2021 Landsat and 2015–2021 Sentinel-2 datasets. For each site we ran SMA using a selection of site-specific and generalized endmembers, and compared the pixel- and map-level trends. Our results indicated no significant impact of varying data density on the long-term trends from Cumulative Endmember Fractions in European grasslands. Conversely, the use of different endmember definitions led in some regions to significantly different pixel- and map-level long-term trends raising questions about the suitability of the ‘classical’ SMA for complex landscapes and large territories. Therefore, we caution against using the ‘classical’ SMA for remote-sensing-based applications across broader scales or in heterogenous landscapes, particularly for trend analyses, as the results may lead to erroneous conclusions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
Bader, Jonathan; Skalski, Fabian; Lehmann, Fabian; Scheinert, Dominik; Will, Jonathan; Thamsen, Lauritz; Kao, Odej
Sizey: Memory-Efficient Execution of Scientific Workflow Tasks Proceedings Article
In: 2024 IEEE International Conference on Cluster Computing (CLUSTER), 2024.
@inproceedings{bader2024Sizey,
title = {Sizey: Memory-Efficient Execution of Scientific Workflow Tasks},
author = {Jonathan Bader and Fabian Skalski and Fabian Lehmann and Dominik Scheinert and Jonathan Will and Lauritz Thamsen and Odej Kao},
url = {https://ieeexplore.ieee.org/document/10740856},
year = {2024},
date = {2024-09-21},
urldate = {2024-09-21},
booktitle = {2024 IEEE International Conference on Cluster Computing (CLUSTER)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Bader, Jonathan; Lehmann, Fabian; Thamsen, Lauritz; Leser, Ulf; Kao, Odej
Lotaru: Locally predicting workflow task runtimes for resource management on heterogeneous infrastructures Journal Article
In: Future Generation Computer Systems, vol. 150, pp. 171-185, 2024, ISSN: 0167-739X.
@article{BADER2023,
title = {Lotaru: Locally predicting workflow task runtimes for resource management on heterogeneous infrastructures},
author = {Jonathan Bader and Fabian Lehmann and Lauritz Thamsen and Ulf Leser and Odej Kao},
url = {https://www.sciencedirect.com/science/article/pii/S0167739X23003229},
doi = {https://doi.org/10.1016/j.future.2023.08.022},
issn = {0167-739X},
year = {2024},
date = {2024-01-01},
urldate = {2023-01-01},
journal = {Future Generation Computer Systems},
volume = {150},
pages = {171-185},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lewińska, Katarzyna Ewa; Frantz, David; Leser, Ulf; Hostert, Patrick
Usable observations over Europe: evaluation of compositing windows for Landsat and Sentinel-2 time series Journal Article
In: European Journal of Remote Sensing, vol. 57, no. 1, pp. 2372855, 2024.
@article{doi:10.1080/22797254.2024.2372855,
title = {Usable observations over Europe: evaluation of compositing windows for Landsat and Sentinel-2 time series},
author = {Katarzyna Ewa Lewińska and David Frantz and Ulf Leser and Patrick Hostert},
url = {https://doi.org/10.1080/22797254.2024.2372855},
doi = {10.1080/22797254.2024.2372855},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {European Journal of Remote Sensing},
volume = {57},
number = {1},
pages = {2372855},
publisher = {Taylor & Francis},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lehmann, Fabian; Bader, Jonathan; Mecquenem, Ninon De; Wang, Xing; Bountris, Vasilis; Friederici, Florian; Leser, Ulf; Thamsen, Lauritz
Ponder: Online Prediction of Task Memory Requirements for Scientific Workflows Proceedings Article
In: 2024 IEEE 20th International Conference on e-Science (e-Science), pp. 1-10, 2024.
@inproceedings{lehmannPonder2024,
title = {Ponder: Online Prediction of Task Memory Requirements for Scientific Workflows},
author = { Fabian Lehmann and Jonathan Bader and Ninon De Mecquenem and Xing Wang and Vasilis Bountris and Florian Friederici and Ulf Leser and Lauritz Thamsen},
url = {https://ieeexplore.ieee.org/document/10678682},
doi = {10.1109/e-Science62913.2024.10678682},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {2024 IEEE 20th International Conference on e-Science (e-Science)},
pages = {1-10},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Sänger, Mario; Mecquenem, Ninon De; Lewińska, Katarzyna Ewa; Bountris, Vasilis; Lehmann, Fabian; Leser, Ulf; Kosch, Thomas
A Qualitative Assessment of Using ChatGPT as Large Language Model for Scientific Workflow Development Journal Article
In: GigaScience, 2024, ISSN: 2047-217X.
@article{saenger2024a,
title = {A Qualitative Assessment of Using ChatGPT as Large Language Model for Scientific Workflow Development},
author = { Mario Sänger and Ninon De Mecquenem and Katarzyna Ewa Lewińska and Vasilis Bountris and Fabian Lehmann and Ulf Leser and Thomas Kosch},
url = {https://doi.org/10.1093/gigascience/giae030},
doi = {10.1093/gigascience/giae030},
issn = {2047-217X},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {GigaScience},
abstract = {Scientific workflow systems are increasingly popular for expressing and executing complex data analysis pipelines over large datasets, as they offer reproducibility, dependability, and scalability of analyses by automatic parallelization on large compute clusters. However, implementing workflows is difficult due to the involvement of many black-box tools and the deep infrastructure stack necessary for their execution. Simultaneously, user-supporting tools are rare, and the number of available examples is much lower than in classical programming languages.To address these challenges, we investigate the efficiency of large language models (LLMs), specifically ChatGPT, to support users when dealing with scientific workflows. We performed 3 user studies in 2 scientific domains to evaluate ChatGPT for comprehending, adapting, and extending workflows. Our results indicate that LLMs efficiently interpret workflows but achieve lower performance for exchanging components or purposeful workflow extensions. We characterize their limitations in these challenging scenarios and suggest future research directions.Our results show a high accuracy for comprehending and explaining scientific workflows while achieving a reduced performance for modifying and extending workflow descriptions. These findings clearly illustrate the need for further research in this area.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
