This repository has been archived by the owner on Jun 6, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexecutor_timing_visualization.py
53 lines (43 loc) · 2.05 KB
/
executor_timing_visualization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import matplotlib.pyplot as plt
import pandas as pd
import os
import json
def load_and_process_json_files(directory):
data = []
all_submitted_times = []
for file_name in sorted(os.listdir(directory), key=lambda x: int(x.split('.')[0])):
if file_name.endswith('.json'):
with open(os.path.join(directory, file_name), 'r') as file:
jobs = json.load(file)
file_index = int(file_name.split('.')[0])
for job in jobs:
job['file_index'] = file_index
job['submitted_at'] = pd.to_datetime(job['submitted_at'])
job['finished_at'] = pd.to_datetime(job['finished_at'])
all_submitted_times.append(job['submitted_at'])
data.append(job)
return data, all_submitted_times
# Directory containing the JSON files
directory = "./executor_logs"
jobs_data, all_submitted_times = load_and_process_json_files(directory)
df_jobs = pd.DataFrame(jobs_data)
# Normalize timestamps by smallest submitted_at timestamp
min_submitted_at = min(all_submitted_times)
df_jobs['normalized_submitted'] = (df_jobs['submitted_at'] - min_submitted_at).dt.total_seconds()
df_jobs['normalized_finished'] = (df_jobs['finished_at'] - min_submitted_at).dt.total_seconds()
# Plotting
plt.figure(figsize=(14, 8))
file_indices = df_jobs['file_index'].unique()
for index in sorted(file_indices):
df_subset = df_jobs[df_jobs['file_index'] == index]
for i, row in df_subset.iterrows():
plt.plot([row['normalized_submitted'], row['normalized_finished']], [index, index],
marker='o', linewidth=5, markersize=5) # Thicker line and larger markers
plt.yticks(sorted(file_indices), labels=[f"Executor {index}" for index in sorted(file_indices)], fontsize=12)
plt.gca().invert_yaxis()
plt.xlabel('Time (seconds from first query submission)', fontsize=14)
plt.ylabel('Executor ID', fontsize=14)
plt.title('Normalized Timeline of Executor Busy/Idle Times', fontsize=18)
plt.grid(True) # Add gridlines
plt.tight_layout()
plt.show()