Adds Gantt Charts (#63)

* added gantt charts
* removed .pycache
* updated for newer versions of the event

Co-authored-by: t-saste <t-saste@STEVENS-HEKA-1>
This commit is contained in:
Sam 2020-07-29 10:10:49 -07:00 committed by GitHub
parent 17f8b79db7
commit f9ddc12a9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 488 additions and 12 deletions

View File

@ -2,6 +2,8 @@
Job Graph events can be used to identify bottlenecks in data refreshes by highlighting the critical path. For instances of Analysis Services not running on-premise, the graph is broken into 16 Kb chunks, each in their own event. The events can be reassembled with this script. Job Graph events can be used to identify bottlenecks in data refreshes by highlighting the critical path. For instances of Analysis Services not running on-premise, the graph is broken into 16 Kb chunks, each in their own event. The events can be reassembled with this script.
# Rebuilding the DGMl file
## Requirements ## Requirements
* Python 3.8 or later * Python 3.8 or later
@ -20,3 +22,21 @@ python rebuild.py path\to\trace.xml output_folder
``` ```
6. Inside `output_folder` there will be two .DGML files, which can be opened in Visual Studio. 6. Inside `output_folder` there will be two .DGML files, which can be opened in Visual Studio.
# Creating a Gantt Chart
## Requirements
* Python 3.8 or later
* A valid job graph DGML file (from above)
## Usage
1. Get a .DGML file with all the anntoations (running duration, waiting duration, etc.)
2. Run `gantt\script.py` like so:
```bash
python gantt\script.py path\to\file.dgml output_folder
```
3. Inside `output_folder` there will be an .html file that can be opened in a browser.

View File

View File

@ -0,0 +1,59 @@
from dataclasses import dataclass
from typing import List, Set, Iterable, Optional, cast
from datetime import datetime
import xml.etree.ElementTree as ET
from structures import Job
from gantt_types import ThreadId
def read_jobs(filename: str) -> List[Job]:
jobs: List[Job] = []
doc = ET.parse(filename)
root = doc.getroot()
try:
nodes = [child for child in root if "nodes" in child.tag.lower()][0]
except IndexError:
return jobs
for node in nodes:
if job := parse_job_node(node):
jobs.append(job)
return jobs
def parse_iso(time: str) -> datetime:
if time[-1].lower() == "z":
time = time[:-1]
return datetime.fromisoformat(time)
def parse_thread_id(s: str) -> ThreadId:
return ThreadId(int(s))
def strip_newlines(s: str) -> str:
return "".join([c for c in s if ord(c) > 32])
def parse_job_node(node: ET.Element) -> Optional[Job]:
for attr, value in node.attrib.items():
if attr == "StartedAt":
start = parse_iso(value)
if attr == "FinishedAt":
end = parse_iso(value)
if attr == "Label":
name = value
if attr == "Slot" or attr == "Thread":
thread = value
try:
return Job(start, end, strip_newlines(name), parse_thread_id(thread))
except UnboundLocalError:
# most likely doesn't include "Thread" or "Slot" attribute.
return None

View File

@ -0,0 +1,6 @@
from typing import NewType
ThreadId = NewType("ThreadId", int)
Millisecond = NewType("Millisecond", float)
Second = NewType("Second", float)

View File

@ -0,0 +1,70 @@
html {
overflow-x: scroll;
}
* {
box-sizing: border-box;
}
main {
padding: 5px;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
}
div.gantt {
position: relative;
padding: 25px 0px;
}
div.row {
white-space: nowrap;
position: relative;
height: 2em;
}
div.row:nth-child(even) {
background-color: #f2f2f2;
}
div.row>span.axis-tick {
border-left: 1px solid #777777;
position: absolute;
padding-left: 1ch;
display: inline-block;
height: 100%;
}
div.row>span.axis-tick:nth-of-type(2) {
border-left: 0;
}
div.row>span.job {
display: inline-block;
height: 100%;
border-radius: 3px;
border: 0.5px solid white;
position: absolute;
}
div.row>span.legend {
display: inline-block;
height: 100%;
border-right: 1px solid #777777;
}
span[data-descr]:hover::after,
span[data-descr]:focus::after {
content: attr(data-descr);
position: absolute;
left: 0px;
top: 1.8em;
min-width: 200px;
border: 1px #aaaaaa solid;
border-radius: 10px;
background-color: #ffff8d;
padding: 6px;
color: #000000;
font-size: 14px;
z-index: 1;
white-space: pre;
}

View File

@ -0,0 +1,128 @@
from datetime import datetime
from structures import Gantt, Row, Job
from gantt_types import Second, Millisecond
import structures
import utility
import operator
COLORS = [
"#d50000",
"#00bfa5",
"#ff6f00",
"#aa00ff",
"#006064",
"#ffd600",
"#64dd17",
]
HEADER_COLUMN_WIDTH = 240
def ms_to_px(ms: Millisecond) -> float:
return ms / 10
def job_to_html(job: Job, start: datetime, color: str) -> str:
left = ms_to_px(utility.duration_ms(start, job.start)) + HEADER_COLUMN_WIDTH
width = ms_to_px(structures.job_duration_ms(job))
return f"""<span class="job" data-descr="{job.name}{chr(10)}Duration: {utility.ms_to_s(structures.job_duration_ms(job)):.2f}s" style="left: {left}px; width: {width}px; background-color: {color}"></span>"""
def row_to_html(
row: Row, start: datetime, process_num: int, color: str, width: float
) -> str:
legend_html = f"""<span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Concurrency Slot {process_num} ({utility.ms_to_s(structures.row_computing_duration_ms(row)):.1f}s)</span>"""
jobs_html = "\n".join([job_to_html(job, start, color) for job in row.jobs])
return (
f"""<div class="row" style="width: {width}px;">{legend_html}{jobs_html}</div>"""
)
def rownum_to_top(num: int) -> float:
return num * 2
def make_axis_span(left: float, s: Second) -> str:
return f"""<span class="axis-tick" style="left: {left}px;">{s} sec</span>"""
def make_axis_html(max_seconds: Second) -> str:
seconds = [Second(i * 2) for i in range(1000)]
seconds = [i for i in seconds if i < max_seconds]
axis_spans = "".join(
[
make_axis_span(ms_to_px(utility.s_to_ms(s)) + HEADER_COLUMN_WIDTH, s)
for s in seconds
]
)
return f"""<div class="row axis">
<span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Total Processing Time</span>
{axis_spans}
</div>"""
def gantt_to_html(g: Gantt) -> str:
if not g:
return ""
start = min([row.jobs[0].start for row in g])
max_seconds = max([utility.ms_to_s(structures.row_duration_ms(row)) for row in g])
rows_html = "\n".join(
[
row_to_html(
row,
start,
num + 1,
COLORS[num % len(COLORS)],
ms_to_px(utility.s_to_ms(max_seconds)) + HEADER_COLUMN_WIDTH,
)
for num, row in enumerate(
sorted(
g,
reverse=True,
key=lambda r: structures.row_computing_duration_ms(r),
)
)
]
)
return f"""<div class="gantt">{make_axis_html(max_seconds)}{rows_html}</div>"""
def style() -> str:
with open("./gantt/output.css") as css:
return f"""<style>{css.read()}</style>"""
def html(g: Gantt) -> str:
html = f"""
<html>
<head></head>
<body>
<main>
<h1>Gantt Chart</h1>
<p>Max parallelism: {len(g)}</p>
{gantt_to_html(g)}
<h1>Explanation</h1>
<p>
<ul>
<li>Each row represents a parallelism "slot"; if "maxParallelism" was 4, then there are 4 rows.</li>
<li>Each colored block is a job; hover with a mouse to show the name and how long it took.</li>
<li>Each row shows the total time spent doing jobs to highlight bottlenecks.</li>
</ul>
</p>
</main>
{style()}
</body>
</html>
"""
return html if g else ""

View File

@ -0,0 +1,86 @@
from dataclasses import dataclass
from typing import List, Set, Iterable, Optional, cast
import os, sys
from structures import Job, Gantt, Row, new_gantt
import dgml, output
def get_dir(folder: str) -> Set[str]:
return set(
[
os.path.join(folder, filename)
for filename in os.listdir(folder)
if os.path.isfile(os.path.join(folder, filename))
]
)
def write_document(content: str, filepath: str) -> None:
os.makedirs(os.path.dirname(filepath), exist_ok=True)
with open(filepath, "w") as file:
file.write(content)
def output_file_path(file: str, out_folder: str) -> str:
base = os.path.basename(file)
base, ext = os.path.splitext(base)
return os.path.join(out_folder, base + ".html")
def make_gantt(file: str, out_folder: str) -> None:
html = output.html(new_gantt(dgml.read_jobs(file)))
if not html:
print(f"No jobs found in {file}; maybe this is not the -annotated file?")
else:
write_document(html, output_file_path(file, out_folder))
print(f'Saving "{output_file_path(file, out_folder)}"')
def make_gantt_dir(folder: str, out_folder: str) -> None:
for file in get_dir(folder):
make_gantt(file, out_folder)
# SCRIPT
def print_help() -> None:
print(
"""
Guide for gantt/script.py
(requires Python 3.8 or later)
Use:
\tpython gantt/script.py <input folder> <output folder>
\t\tRebuilds all graphs in "./data" and writes them to "./output".
\tpython rebuild.py <inputfile> <inputfile> ... <outputfolder>
\t\tRebuilds <inputfile>s and writes them to <outputfolder>
"""
)
def main() -> None:
if len(sys.argv) < 3:
print_help()
else:
_, *inputs, output_folder = sys.argv
for i in inputs:
if os.path.isfile(i):
make_gantt(i, output_folder)
elif os.path.isdir(i):
make_gantt_dir(i, output_folder)
else:
print(f"{i} is not a file or directory.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,72 @@
from typing import Tuple, List, NamedTuple, Optional, NewType
from datetime import datetime
import utility
from gantt_types import ThreadId, Millisecond, Second
import operator
class Job(NamedTuple):
start: datetime
end: datetime
name: str
thread: ThreadId
class Row(NamedTuple):
jobs: List[Job]
thread: ThreadId
Gantt = List[Row]
def add_job(row: Row, job: Job) -> None:
assert row.thread == job.thread, f"row: {row.thread}, job: {job.thread}"
if row.jobs:
assert (
row.jobs[-1].end <= job.start
), f"{row.jobs[-1].end} is not less than {job.start} (thread id: {row.thread})"
row.jobs.append(job)
def new_row(job: Job) -> Row:
return Row([job], job.thread)
def row_duration_ms(row: Row) -> Millisecond:
return utility.duration_ms(row.jobs[0].start, row.jobs[-1].end)
def row_computing_duration_ms(row: Row) -> Millisecond:
return Millisecond(sum([job_duration_ms(job) for job in row.jobs]))
def row_with_thread(g: Gantt, thread: ThreadId) -> Optional[Row]:
for row in g:
if row.thread == thread:
return row
return None
def add_row(g: Gantt, row: Row) -> None:
g.append(row)
def new_gantt(jobs: List[Job]) -> Gantt:
g: Gantt = []
for job in sorted(jobs, key=operator.attrgetter("start")):
if row := row_with_thread(g, job.thread):
add_job(row, job)
else:
add_row(g, new_row(job))
return g
def job_duration_ms(job: Job) -> Millisecond:
return utility.duration_ms(job.start, job.end)

View File

@ -0,0 +1,16 @@
from datetime import datetime
from gantt_types import Millisecond, Second
def duration_ms(start_time: datetime, end_time: datetime) -> Millisecond:
duration = end_time - start_time
return Millisecond((duration.seconds * 1000000 + duration.microseconds) // 1000)
def ms_to_s(m: Millisecond) -> Second:
return Second(m / 1000)
def s_to_ms(s: Second) -> Millisecond:
return Millisecond(s * 1000)

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python3
""" """
Rebuilds a DGML file. Requires Python 3.8. Rebuilds a DGML file. Requires Python 3.8.
""" """
@ -36,7 +38,14 @@ def load_file(filename: str) -> List[Row]:
if ext == ".csv": if ext == ".csv":
with open(filename) as file: with open(filename) as file:
dict_rows = csv.DictReader(file) dict_rows = csv.DictReader(file)
rows = [make_row_from_jarvis(row["MessageText"]) for row in dict_rows] rows = [
make_row_from_jarvis(
row["MessageText"],
row["CurrentActivityId"],
int(row["Engine_EventSubclass"]),
)
for row in dict_rows
]
return [r for r in rows if r] return [r for r in rows if r]
@ -64,7 +73,7 @@ def make_row_from_xml(event: ET.Element, ns: Dict[str, str]) -> Optional[Row]:
subclass = None subclass = None
for col in event.findall("Column", ns): for col in event.findall("Column", ns):
if col.attrib["id"] == "46": if col.attrib["id"] == "46" or col.attrib["id"] == "53":
guid = col.text guid = col.text
if col.attrib["id"] == "1": if col.attrib["id"] == "1":
@ -83,19 +92,29 @@ def make_row_from_xml(event: ET.Element, ns: Dict[str, str]) -> Optional[Row]:
return None return None
def make_row_from_jarvis(message_txt: str) -> Optional[Row]: def make_row_from_jarvis(
if "graphcorrelationid" in message_txt.lower(): message_txt: str, activity_id: str, subclass: int
print( ) -> Optional[Row]:
"This event is from an older version of the job graph feature (shouldn't have 'GraphCorrelationID' in it)" guid = activity_id + str(subclass) + ("-annotated" if subclass == 2 else "-plan")
)
match = re.match(r"TextData: (.*); IntegerData: (.\d*)", message_txt) if "graphcorrelationid" in message_txt.lower():
if match: match = re.match(
textdata, guid, order_marker_str = match.group(1, 2, 3) r"TextData: (.*); GraphCorrelationID: (.*); IntegerData: (.\d*)",
message_txt,
)
if match:
textdata, order_marker_str = match.group(1, 3)
else:
match = re.match(r"TextData: (.*); IntegerData: (.\d*)", message_txt)
if match:
textdata, order_marker_str = match.group(1, 2)
try:
order_marker = int(order_marker_str) order_marker = int(order_marker_str)
return Row(guid, order_marker, textdata) return Row(guid, order_marker, textdata)
except UnboundLocalError:
return None return None
def extract_metadata(header_row: Row) -> Optional[Tuple[int, int]]: def extract_metadata(header_row: Row) -> Optional[Tuple[int, int]]: