Adds Gantt Charts (#63)

* added gantt charts * removed .pycache * updated for newer versions of the event Co-authored-by: t-saste <t-saste@STEVENS-HEKA-1>
2020-07-29 10:10:49 -07:00 · 2020-07-29 10:10:49 -07:00 · f9ddc12a9b
commit f9ddc12a9b
parent 17f8b79db7
10 changed files with 488 additions and 12 deletions
--- a/ASJobGraphEvents/README.md
+++ b/ASJobGraphEvents/README.md
@ -2,6 +2,8 @@
 Job Graph events can be used to identify bottlenecks in data refreshes by highlighting the critical path. For instances of Analysis Services not running on-premise, the graph is broken into 16 Kb chunks, each in their own event. The events can be reassembled with this script. 
 # Rebuilding the DGMl file
 ## Requirements
 * Python 3.8 or later
@ -20,3 +22,21 @@ python rebuild.py path\to\trace.xml output_folder
 ```
 6. Inside `output_folder` there will be two .DGML files, which can be opened in Visual Studio.
 # Creating a Gantt Chart
 ## Requirements
 * Python 3.8 or later
 * A valid job graph DGML file (from above)
 ## Usage
 1. Get a .DGML file with all the anntoations (running duration, waiting duration, etc.)
 2. Run `gantt\script.py` like so:
 ```bash
 python gantt\script.py path\to\file.dgml output_folder
 ```
 3. Inside `output_folder` there will be an .html file that can be opened in a browser.
--- a/ASJobGraphEvents/gantt/init.py
+++ b/ASJobGraphEvents/gantt/init.py
--- a/ASJobGraphEvents/gantt/dgml.py
+++ b/ASJobGraphEvents/gantt/dgml.py
@ -0,0 +1,59 @@
 from dataclasses import dataclass
 from typing import List, Set, Iterable, Optional, cast
 from datetime import datetime
 import xml.etree.ElementTree as ET
 from structures import Job
 from gantt_types import ThreadId
 def read_jobs(filename: str) -> List[Job]:
    jobs: List[Job] = []
    doc = ET.parse(filename)
    root = doc.getroot()
    try:
        nodes = [child for child in root if "nodes" in child.tag.lower()][0]
    except IndexError:
        return jobs
    for node in nodes:
        if job := parse_job_node(node):
            jobs.append(job)
    return jobs
 def parse_iso(time: str) -> datetime:
    if time[-1].lower() == "z":
        time = time[:-1]
    return datetime.fromisoformat(time)
 def parse_thread_id(s: str) -> ThreadId:
    return ThreadId(int(s))
 def strip_newlines(s: str) -> str:
    return "".join([c for c in s if ord(c) > 32])
 def parse_job_node(node: ET.Element) -> Optional[Job]:
    for attr, value in node.attrib.items():
        if attr == "StartedAt":
            start = parse_iso(value)
        if attr == "FinishedAt":
            end = parse_iso(value)
        if attr == "Label":
            name = value
        if attr == "Slot" or attr == "Thread":
            thread = value
    try:
        return Job(start, end, strip_newlines(name), parse_thread_id(thread))
    except UnboundLocalError:
        # most likely doesn't include "Thread" or "Slot" attribute.
        return None
--- a/ASJobGraphEvents/gantt/gantt_types.py
+++ b/ASJobGraphEvents/gantt/gantt_types.py
@ -0,0 +1,6 @@
 from typing import NewType
 ThreadId = NewType("ThreadId", int)
 Millisecond = NewType("Millisecond", float)
 Second = NewType("Second", float)
--- a/ASJobGraphEvents/gantt/output.css
+++ b/ASJobGraphEvents/gantt/output.css
@ -0,0 +1,70 @@
 html {
    overflow-x: scroll;
 }
 * {
    box-sizing: border-box;
 }
 main {
    padding: 5px;
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
 }
 div.gantt {
    position: relative;
    padding: 25px 0px;
 }
 div.row {
    white-space: nowrap;
    position: relative;
    height: 2em;
 }
 div.row:nth-child(even) {
    background-color: #f2f2f2;
 }
 div.row>span.axis-tick {
    border-left: 1px solid #777777;
    position: absolute;
    padding-left: 1ch;
    display: inline-block;
    height: 100%;
 }
 div.row>span.axis-tick:nth-of-type(2) {
    border-left: 0;
 }
 div.row>span.job {
    display: inline-block;
    height: 100%;
    border-radius: 3px;
    border: 0.5px solid white;
    position: absolute;
 }
 div.row>span.legend {
    display: inline-block;
    height: 100%;
    border-right: 1px solid #777777;
 }
 span[data-descr]:hover::after,
 span[data-descr]:focus::after {
    content: attr(data-descr);
    position: absolute;
    left: 0px;
    top: 1.8em;
    min-width: 200px;
    border: 1px #aaaaaa solid;
    border-radius: 10px;
    background-color: #ffff8d;
    padding: 6px;
    color: #000000;
    font-size: 14px;
    z-index: 1;
    white-space: pre;
 }
--- a/ASJobGraphEvents/gantt/output.py
+++ b/ASJobGraphEvents/gantt/output.py
@ -0,0 +1,128 @@
 from datetime import datetime
 from structures import Gantt, Row, Job
 from gantt_types import Second, Millisecond
 import structures
 import utility
 import operator
 COLORS = [
    "#d50000",
    "#00bfa5",
    "#ff6f00",
    "#aa00ff",
    "#006064",
    "#ffd600",
    "#64dd17",
 ]
 HEADER_COLUMN_WIDTH = 240
 def ms_to_px(ms: Millisecond) -> float:
    return ms / 10
 def job_to_html(job: Job, start: datetime, color: str) -> str:
    left = ms_to_px(utility.duration_ms(start, job.start)) + HEADER_COLUMN_WIDTH
    width = ms_to_px(structures.job_duration_ms(job))
    return f"""<span class="job" data-descr="{job.name}{chr(10)}Duration: {utility.ms_to_s(structures.job_duration_ms(job)):.2f}s" style="left: {left}px; width: {width}px; background-color: {color}"></span>"""
 def row_to_html(
    row: Row, start: datetime, process_num: int, color: str, width: float
 ) -> str:
    legend_html = f"""<span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Concurrency Slot {process_num} ({utility.ms_to_s(structures.row_computing_duration_ms(row)):.1f}s)</span>"""
    jobs_html = "\n".join([job_to_html(job, start, color) for job in row.jobs])
    return (
        f"""<div class="row" style="width: {width}px;">{legend_html}{jobs_html}</div>"""
    )
 def rownum_to_top(num: int) -> float:
    return num * 2
 def make_axis_span(left: float, s: Second) -> str:
    return f"""<span class="axis-tick" style="left: {left}px;">{s} sec</span>"""
 def make_axis_html(max_seconds: Second) -> str:
    seconds = [Second(i * 2) for i in range(1000)]
    seconds = [i for i in seconds if i < max_seconds]
    axis_spans = "".join(
        [
            make_axis_span(ms_to_px(utility.s_to_ms(s)) + HEADER_COLUMN_WIDTH, s)
            for s in seconds
        ]
    )
    return f"""<div class="row axis">
    <span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Total Processing Time</span>
    {axis_spans}
 </div>"""
 def gantt_to_html(g: Gantt) -> str:
    if not g:
        return ""
    start = min([row.jobs[0].start for row in g])
    max_seconds = max([utility.ms_to_s(structures.row_duration_ms(row)) for row in g])
    rows_html = "\n".join(
        [
            row_to_html(
                row,
                start,
                num + 1,
                COLORS[num % len(COLORS)],
                ms_to_px(utility.s_to_ms(max_seconds)) + HEADER_COLUMN_WIDTH,
            )
            for num, row in enumerate(
                sorted(
                    g,
                    reverse=True,
                    key=lambda r: structures.row_computing_duration_ms(r),
                )
            )
        ]
    )
    return f"""<div class="gantt">{make_axis_html(max_seconds)}{rows_html}</div>"""
 def style() -> str:
    with open("./gantt/output.css") as css:
        return f"""<style>{css.read()}</style>"""
 def html(g: Gantt) -> str:
    html = f"""
 <html>
 <head></head>
 <body>
 <main>
 <h1>Gantt Chart</h1>
 <p>Max parallelism: {len(g)}</p>
 {gantt_to_html(g)}
 <h1>Explanation</h1>
 <p>
    <ul>
    <li>Each row represents a parallelism "slot"; if "maxParallelism" was 4, then there are 4 rows.</li>
    <li>Each colored block is a job; hover with a mouse to show the name and how long it took.</li>
    <li>Each row shows the total time spent doing jobs to highlight bottlenecks.</li>
    </ul>
 </p>
 </main>
 {style()}
 </body>
 </html>
 """
    return html if g else ""
--- a/ASJobGraphEvents/gantt/script.py
+++ b/ASJobGraphEvents/gantt/script.py
@ -0,0 +1,86 @@
 from dataclasses import dataclass
 from typing import List, Set, Iterable, Optional, cast
 import os, sys
 from structures import Job, Gantt, Row, new_gantt
 import dgml, output
 def get_dir(folder: str) -> Set[str]:
    return set(
        [
            os.path.join(folder, filename)
            for filename in os.listdir(folder)
            if os.path.isfile(os.path.join(folder, filename))
        ]
    )
 def write_document(content: str, filepath: str) -> None:
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, "w") as file:
        file.write(content)
 def output_file_path(file: str, out_folder: str) -> str:
    base = os.path.basename(file)
    base, ext = os.path.splitext(base)
    return os.path.join(out_folder, base + ".html")
 def make_gantt(file: str, out_folder: str) -> None:
    html = output.html(new_gantt(dgml.read_jobs(file)))
    if not html:
        print(f"No jobs found in {file}; maybe this is not the -annotated file?")
    else:
        write_document(html, output_file_path(file, out_folder))
        print(f'Saving "{output_file_path(file, out_folder)}"')
 def make_gantt_dir(folder: str, out_folder: str) -> None:
    for file in get_dir(folder):
        make_gantt(file, out_folder)
 # SCRIPT
 def print_help() -> None:
    print(
        """
 Guide for gantt/script.py
 (requires Python 3.8 or later)
 Use:
 \tpython gantt/script.py <input folder> <output folder>
 \t\tRebuilds all graphs in "./data" and writes them to "./output".
 \tpython rebuild.py <inputfile> <inputfile> ... <outputfolder>
 \t\tRebuilds <inputfile>s and writes them to <outputfolder>
 """
    )
 def main() -> None:
    if len(sys.argv) < 3:
        print_help()
    else:
        _, *inputs, output_folder = sys.argv
        for i in inputs:
            if os.path.isfile(i):
                make_gantt(i, output_folder)
            elif os.path.isdir(i):
                make_gantt_dir(i, output_folder)
            else:
                print(f"{i} is not a file or directory.")
 if __name__ == "__main__":
    main()
--- a/ASJobGraphEvents/gantt/structures.py
+++ b/ASJobGraphEvents/gantt/structures.py
@ -0,0 +1,72 @@
 from typing import Tuple, List, NamedTuple, Optional, NewType
 from datetime import datetime
 import utility
 from gantt_types import ThreadId, Millisecond, Second
 import operator
 class Job(NamedTuple):
    start: datetime
    end: datetime
    name: str
    thread: ThreadId
 class Row(NamedTuple):
    jobs: List[Job]
    thread: ThreadId
 Gantt = List[Row]
 def add_job(row: Row, job: Job) -> None:
    assert row.thread == job.thread, f"row: {row.thread}, job: {job.thread}"
    if row.jobs:
        assert (
            row.jobs[-1].end <= job.start
        ), f"{row.jobs[-1].end} is not less than {job.start} (thread id: {row.thread})"
    row.jobs.append(job)
 def new_row(job: Job) -> Row:
    return Row([job], job.thread)
 def row_duration_ms(row: Row) -> Millisecond:
    return utility.duration_ms(row.jobs[0].start, row.jobs[-1].end)
 def row_computing_duration_ms(row: Row) -> Millisecond:
    return Millisecond(sum([job_duration_ms(job) for job in row.jobs]))
 def row_with_thread(g: Gantt, thread: ThreadId) -> Optional[Row]:
    for row in g:
        if row.thread == thread:
            return row
    return None
 def add_row(g: Gantt, row: Row) -> None:
    g.append(row)
 def new_gantt(jobs: List[Job]) -> Gantt:
    g: Gantt = []
    for job in sorted(jobs, key=operator.attrgetter("start")):
        if row := row_with_thread(g, job.thread):
            add_job(row, job)
        else:
            add_row(g, new_row(job))
    return g
 def job_duration_ms(job: Job) -> Millisecond:
    return utility.duration_ms(job.start, job.end)
--- a/ASJobGraphEvents/gantt/utility.py
+++ b/ASJobGraphEvents/gantt/utility.py
@ -0,0 +1,16 @@
 from datetime import datetime
 from gantt_types import Millisecond, Second
 def duration_ms(start_time: datetime, end_time: datetime) -> Millisecond:
    duration = end_time - start_time
    return Millisecond((duration.seconds * 1000000 + duration.microseconds) // 1000)
 def ms_to_s(m: Millisecond) -> Second:
    return Second(m / 1000)
 def s_to_ms(s: Second) -> Millisecond:
    return Millisecond(s * 1000)
--- a/ASJobGraphEvents/rebuild.py
+++ b/ASJobGraphEvents/rebuild.py
@ -1,3 +1,5 @@
 #!/usr/bin/env python3
 """
 Rebuilds a DGML file. Requires Python 3.8.
 """
@ -36,7 +38,14 @@ def load_file(filename: str) -> List[Row]:
    if ext == ".csv":
        with open(filename) as file:
            dict_rows = csv.DictReader(file)
-            rows = [make_row_from_jarvis(row["MessageText"]) for row in dict_rows]
+            rows = [
                make_row_from_jarvis(
                    row["MessageText"],
                    row["CurrentActivityId"],
                    int(row["Engine_EventSubclass"]),
                )
                for row in dict_rows
            ]
            return [r for r in rows if r]
@ -64,7 +73,7 @@ def make_row_from_xml(event: ET.Element, ns: Dict[str, str]) -> Optional[Row]:
    subclass = None
    for col in event.findall("Column", ns):
-        if col.attrib["id"] == "46":
+        if col.attrib["id"] == "46" or col.attrib["id"] == "53":
            guid = col.text
        if col.attrib["id"] == "1":
@ -83,19 +92,29 @@ def make_row_from_xml(event: ET.Element, ns: Dict[str, str]) -> Optional[Row]:
    return None
-def make_row_from_jarvis(message_txt: str) -> Optional[Row]:
+def make_row_from_jarvis(
-    if "graphcorrelationid" in message_txt.lower():
+    message_txt: str, activity_id: str, subclass: int
-        print(
+) -> Optional[Row]:
-            "This event is from an older version of the job graph feature (shouldn't have 'GraphCorrelationID' in it)"
+    guid = activity_id + str(subclass) + ("-annotated" if subclass == 2 else "-plan")
        )
-    match = re.match(r"TextData: (.*); IntegerData: (.\d*)", message_txt)
+    if "graphcorrelationid" in message_txt.lower():
-    if match:
+        match = re.match(
-        textdata, guid, order_marker_str = match.group(1, 2, 3)
+            r"TextData: (.*); GraphCorrelationID: (.*); IntegerData: (.\d*)",
            message_txt,
        )
        if match:
            textdata, order_marker_str = match.group(1, 3)
    else:
        match = re.match(r"TextData: (.*); IntegerData: (.\d*)", message_txt)
        if match:
            textdata, order_marker_str = match.group(1, 2)
    try:
        order_marker = int(order_marker_str)
        return Row(guid, order_marker, textdata)
-
+    except UnboundLocalError:
-    return None
+        return None
 def extract_metadata(header_row: Row) -> Optional[Tuple[int, int]]: