diff --git a/06-expansion.md b/06-expansion.md index c6dba6a..1d1ec6d 100644 --- a/06-expansion.md +++ b/06-expansion.md @@ -119,7 +119,7 @@ curl -O https://ocaisa.github.io/hpc-workflows/files/plot_terse_amdahl_results.p The script `plot_terse_amdahl_results.py` needs a command line that looks like: ```bash -python plot_terse_amdahl_results.py <1st input file> <2nd input file> ... +python plot_terse_amdahl_results.py --output <1st input file> <2nd input file> ... ``` Let's introduce that into our `generate_run_files` rule: @@ -129,7 +129,7 @@ rule generate_run_files: output: "p_{parallel_proportion}_runs.txt" input: expand("p_{{parallel_proportion}}/runs/amdahl_run_{count}.json", count=NTASK_SIZES) shell: - "python plot_terse_amdahl_results.py {output} {input}" + "python plot_terse_amdahl_results.py --output {output} {input}" ``` ::: challenge @@ -146,7 +146,7 @@ rule generate_run_files: envmodules: "matplotlib" shell: - "python plot_terse_amdahl_results.py {output} {input}" + "python plot_terse_amdahl_results.py --output {output} {input}" ``` :::::: @@ -188,6 +188,7 @@ snakemake --profile cluster_profile/ p_0.8_scalability.jpg ::: ::: challenge + ## Bonus round Create a final rule that can be called directly and generates a scaling plot for diff --git a/files/plot_terse_amdahl_results.py b/files/plot_terse_amdahl_results.py index a85425f..fdb09bb 100644 --- a/files/plot_terse_amdahl_results.py +++ b/files/plot_terse_amdahl_results.py @@ -1,15 +1,34 @@ -import sys +#!/usr/bin/env python3 +import argparse import json +import matplotlib import matplotlib.pyplot as plt import numpy as np -def process_files(file_list, output="plot.jpg"): +matplotlib.use('AGG') + +description = """ +Plot results of an Amdahl scaling study, +assuming the '--terse' output flag was used. +""" + +def process_files(output, file_list): value_tuples=[] for filename in file_list: # Open the JSON file and load data - with open(filename, 'r') as file: - data = json.load(file) - value_tuples.append((data['nproc'], data['execution_time'])) + try: + with open(filename, 'r') as file: + data = json.load(file) + value_tuples.append((data['nproc'], data['execution_time'])) + except FileNotFoundError: + print(f"Error: File {filename} not found.") + return + except json.JSONDecodeError: + print(f"Error: File {filename} is not a valid JSON.") + return + except KeyError: + print(f"Error: Missing required data in file {filename}.") + return # Sort the tuples sorted_list = sorted(value_tuples) @@ -22,9 +41,10 @@ def process_files(file_list, output="plot.jpg"): # Adding the y=1/x line x_line = np.linspace(1, max(x), 100) # Create x values for the line - y_line = (y[0]/x[0]) / x_line # Calculate corresponding (scaled) y values + y_line = (y[0] / x[0]) / x_line # Calculate corresponding (scaled) y values - plt.plot(x_line, y_line, linestyle='--', color='red', label='Perfect scaling') + plt.plot(x_line, y_line, linestyle='--', + color='red', label='Perfect scaling') # Adding title and labels plt.title("Scaling plot") @@ -34,16 +54,27 @@ def process_files(file_list, output="plot.jpg"): # Show the legend plt.legend() - # Save the plot to a JPEG file - plt.savefig(output, format='jpeg') + # Save the plot to the specified file + plt.savefig(output, dpi=400, bbox_inches="tight") if __name__ == "__main__": - # The first command-line argument is the script name itself, so we skip it - output = sys.argv[1] - filenames = sys.argv[2:] + parser = argparse.ArgumentParser( + description=description, + epilog="Brought to you by HPC Carpentry" + ) + + parser.add_argument( + "--output", + help="Image file to write (PNG or JPG)", + required=True + ) + + parser.add_argument( + "inputs", + help="Amdahl terse output files (JSON)", + nargs="+" + ) - if filenames: - process_files(filenames, output=output) - else: - print("No files provided.") + args = parser.parse_args() + process_files(args.output, args.inputs) diff --git a/md5sum.txt b/md5sum.txt index 17ea628..04ffd3d 100644 --- a/md5sum.txt +++ b/md5sum.txt @@ -9,9 +9,8 @@ "episodes/03-placeholders.md" "ec29790c7fc3aa293954ec99821c3238" "site/built/03-placeholders.md" "2024-05-02" "episodes/04-snakemake_and_mpi.md" "0774e397f186f04b53cf229e3ac4ee26" "site/built/04-snakemake_and_mpi.md" "2024-05-02" "episodes/05-chaining_rules.md" "5dfa4f281e51022957e144fff005157b" "site/built/05-chaining_rules.md" "2024-05-02" -"episodes/06-expansion.md" "cdaffe52029447161dae12a33b5e0398" "site/built/06-expansion.md" "2024-05-02" +"episodes/06-expansion.md" "5dc7189afd60b0c4febd35e0a0e5f38d" "site/built/06-expansion.md" "2024-06-20" "instructors/instructor-notes.md" "cae72b6712578d74a49fea7513099f8c" "site/built/instructor-notes.md" "2023-05-02" "learners/reference.md" "1c7cc4e229304d9806a13f69ca1b8ba4" "site/built/reference.md" "2023-05-02" "learners/setup.md" "61568b36c8b96363218c9736f6aee03a" "site/built/setup.md" "2023-05-02" "profiles/learner-profiles.md" "60b93493cf1da06dfd63255d73854461" "site/built/learner-profiles.md" "2023-05-02" -"renv/profiles/lesson-requirements/renv.lock" "ef993929be05c5a26a77a5a2b190e15c" "site/built/renv.lock" "2024-03-12" diff --git a/renv.lock b/renv.lock deleted file mode 100644 index 54a158b..0000000 --- a/renv.lock +++ /dev/null @@ -1,31 +0,0 @@ -{ - "R": { - "Version": "4.3.3", - "Repositories": [ - { - "Name": "carpentries", - "URL": "https://carpentries.r-universe.dev" - }, - { - "Name": "carpentries_archive", - "URL": "https://carpentries.github.io/drat" - }, - { - "Name": "CRAN", - "URL": "https://cran.rstudio.com" - } - ] - }, - "Packages": { - "renv": { - "Package": "renv", - "Version": "0.17.3", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "utils" - ], - "Hash": "4543b8cd233ae25c6aba8548be9e747e" - } - } -}