This commit is contained in:
Christian Wade 2020-09-22 22:59:43 -07:00
commit 4a1ee084b8
11 changed files with 740 additions and 0 deletions

View File

@ -0,0 +1,42 @@
# Job Graph Events in Power BI
Job Graph events can be used to identify bottlenecks in data refreshes by highlighting the critical path. For instances of Analysis Services not running on-premise, the graph is broken into 16 Kb chunks, each in their own event. The events can be reassembled with this script.
# Rebuilding the DGMl file
## Requirements
* Python 3.8 or later
* Visual Studio
## Usage
1. Start a trace in SQL Server Profiler and select "Job Graph Events".
2. Start a data refresh ("Process Full" in SQL Server Management Studio).
3. Wait for all trace events to arrive in Profiler.
4. `File > Save As > Trace XML File`
5. Aim `rebuild.py` at this file like so:
```bash
python rebuild.py path\to\trace.xml output_folder
```
6. Inside `output_folder` there will be two .DGML files, which can be opened in Visual Studio.
# Creating a Gantt Chart
## Requirements
* Python 3.8 or later
* A valid job graph DGML file (from above)
## Usage
1. Get a .DGML file with all the anntoations (running duration, waiting duration, etc.)
2. Run `gantt\script.py` like so:
```bash
python gantt\script.py path\to\file.dgml output_folder
```
3. Inside `output_folder` there will be an .html file that can be opened in a browser.

View File

View File

@ -0,0 +1,59 @@
from dataclasses import dataclass
from typing import List, Set, Iterable, Optional, cast
from datetime import datetime
import xml.etree.ElementTree as ET
from structures import Job
from gantt_types import ThreadId
def read_jobs(filename: str) -> List[Job]:
jobs: List[Job] = []
doc = ET.parse(filename)
root = doc.getroot()
try:
nodes = [child for child in root if "nodes" in child.tag.lower()][0]
except IndexError:
return jobs
for node in nodes:
if job := parse_job_node(node):
jobs.append(job)
return jobs
def parse_iso(time: str) -> datetime:
if time[-1].lower() == "z":
time = time[:-1]
return datetime.fromisoformat(time)
def parse_thread_id(s: str) -> ThreadId:
return ThreadId(int(s))
def strip_newlines(s: str) -> str:
return "".join([c for c in s if ord(c) > 32])
def parse_job_node(node: ET.Element) -> Optional[Job]:
for attr, value in node.attrib.items():
if attr == "StartedAt":
start = parse_iso(value)
if attr == "FinishedAt":
end = parse_iso(value)
if attr == "Label":
name = value
if attr == "Slot" or attr == "Thread":
thread = value
try:
return Job(start, end, strip_newlines(name), parse_thread_id(thread))
except UnboundLocalError:
# most likely doesn't include "Thread" or "Slot" attribute.
return None

View File

@ -0,0 +1,6 @@
from typing import NewType
ThreadId = NewType("ThreadId", int)
Millisecond = NewType("Millisecond", float)
Second = NewType("Second", float)

View File

@ -0,0 +1,70 @@
html {
overflow-x: scroll;
}
* {
box-sizing: border-box;
}
main {
padding: 5px;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
}
div.gantt {
position: relative;
padding: 25px 0px;
}
div.row {
white-space: nowrap;
position: relative;
height: 2em;
}
div.row:nth-child(even) {
background-color: #f2f2f2;
}
div.row>span.axis-tick {
border-left: 1px solid #777777;
position: absolute;
padding-left: 1ch;
display: inline-block;
height: 100%;
}
div.row>span.axis-tick:nth-of-type(2) {
border-left: 0;
}
div.row>span.job {
display: inline-block;
height: 100%;
border-radius: 3px;
border: 0.5px solid white;
position: absolute;
}
div.row>span.legend {
display: inline-block;
height: 100%;
border-right: 1px solid #777777;
}
span[data-descr]:hover::after,
span[data-descr]:focus::after {
content: attr(data-descr);
position: absolute;
left: 0px;
top: 1.8em;
min-width: 200px;
border: 1px #aaaaaa solid;
border-radius: 10px;
background-color: #ffff8d;
padding: 6px;
color: #000000;
font-size: 14px;
z-index: 1;
white-space: pre;
}

View File

@ -0,0 +1,128 @@
from datetime import datetime
from structures import Gantt, Row, Job
from gantt_types import Second, Millisecond
import structures
import utility
import operator
COLORS = [
"#d50000",
"#00bfa5",
"#ff6f00",
"#aa00ff",
"#006064",
"#ffd600",
"#64dd17",
]
HEADER_COLUMN_WIDTH = 240
def ms_to_px(ms: Millisecond) -> float:
return ms / 10
def job_to_html(job: Job, start: datetime, color: str) -> str:
left = ms_to_px(utility.duration_ms(start, job.start)) + HEADER_COLUMN_WIDTH
width = ms_to_px(structures.job_duration_ms(job))
return f"""<span class="job" data-descr="{job.name}{chr(10)}Duration: {utility.ms_to_s(structures.job_duration_ms(job)):.2f}s" style="left: {left}px; width: {width}px; background-color: {color}"></span>"""
def row_to_html(
row: Row, start: datetime, process_num: int, color: str, width: float
) -> str:
legend_html = f"""<span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Concurrency Slot {process_num} ({utility.ms_to_s(structures.row_computing_duration_ms(row)):.1f}s)</span>"""
jobs_html = "\n".join([job_to_html(job, start, color) for job in row.jobs])
return (
f"""<div class="row" style="width: {width}px;">{legend_html}{jobs_html}</div>"""
)
def rownum_to_top(num: int) -> float:
return num * 2
def make_axis_span(left: float, s: Second) -> str:
return f"""<span class="axis-tick" style="left: {left}px;">{s} sec</span>"""
def make_axis_html(max_seconds: Second) -> str:
seconds = [Second(i * 2) for i in range(1000)]
seconds = [i for i in seconds if i < max_seconds]
axis_spans = "".join(
[
make_axis_span(ms_to_px(utility.s_to_ms(s)) + HEADER_COLUMN_WIDTH, s)
for s in seconds
]
)
return f"""<div class="row axis">
<span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Total Processing Time</span>
{axis_spans}
</div>"""
def gantt_to_html(g: Gantt) -> str:
if not g:
return ""
start = min([row.jobs[0].start for row in g])
max_seconds = max([utility.ms_to_s(structures.row_duration_ms(row)) for row in g])
rows_html = "\n".join(
[
row_to_html(
row,
start,
num + 1,
COLORS[num % len(COLORS)],
ms_to_px(utility.s_to_ms(max_seconds)) + HEADER_COLUMN_WIDTH,
)
for num, row in enumerate(
sorted(
g,
reverse=True,
key=lambda r: structures.row_computing_duration_ms(r),
)
)
]
)
return f"""<div class="gantt">{make_axis_html(max_seconds)}{rows_html}</div>"""
def style() -> str:
with open("./gantt/output.css") as css:
return f"""<style>{css.read()}</style>"""
def html(g: Gantt) -> str:
html = f"""
<html>
<head></head>
<body>
<main>
<h1>Gantt Chart</h1>
<p>Max parallelism: {len(g)}</p>
{gantt_to_html(g)}
<h1>Explanation</h1>
<p>
<ul>
<li>Each row represents a parallelism "slot"; if "maxParallelism" was 4, then there are 4 rows.</li>
<li>Each colored block is a job; hover with a mouse to show the name and how long it took.</li>
<li>Each row shows the total time spent doing jobs to highlight bottlenecks.</li>
</ul>
</p>
</main>
{style()}
</body>
</html>
"""
return html if g else ""

View File

@ -0,0 +1,86 @@
from dataclasses import dataclass
from typing import List, Set, Iterable, Optional, cast
import os, sys
from structures import Job, Gantt, Row, new_gantt
import dgml, output
def get_dir(folder: str) -> Set[str]:
return set(
[
os.path.join(folder, filename)
for filename in os.listdir(folder)
if os.path.isfile(os.path.join(folder, filename))
]
)
def write_document(content: str, filepath: str) -> None:
os.makedirs(os.path.dirname(filepath), exist_ok=True)
with open(filepath, "w") as file:
file.write(content)
def output_file_path(file: str, out_folder: str) -> str:
base = os.path.basename(file)
base, ext = os.path.splitext(base)
return os.path.join(out_folder, base + ".html")
def make_gantt(file: str, out_folder: str) -> None:
html = output.html(new_gantt(dgml.read_jobs(file)))
if not html:
print(f"No jobs found in {file}; maybe this is not the -annotated file?")
else:
write_document(html, output_file_path(file, out_folder))
print(f'Saving "{output_file_path(file, out_folder)}"')
def make_gantt_dir(folder: str, out_folder: str) -> None:
for file in get_dir(folder):
make_gantt(file, out_folder)
# SCRIPT
def print_help() -> None:
print(
"""
Guide for gantt/script.py
(requires Python 3.8 or later)
Use:
\tpython gantt/script.py <input folder> <output folder>
\t\tRebuilds all graphs in "./data" and writes them to "./output".
\tpython rebuild.py <inputfile> <inputfile> ... <outputfolder>
\t\tRebuilds <inputfile>s and writes them to <outputfolder>
"""
)
def main() -> None:
if len(sys.argv) < 3:
print_help()
else:
_, *inputs, output_folder = sys.argv
for i in inputs:
if os.path.isfile(i):
make_gantt(i, output_folder)
elif os.path.isdir(i):
make_gantt_dir(i, output_folder)
else:
print(f"{i} is not a file or directory.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,72 @@
from typing import Tuple, List, NamedTuple, Optional, NewType
from datetime import datetime
import utility
from gantt_types import ThreadId, Millisecond, Second
import operator
class Job(NamedTuple):
start: datetime
end: datetime
name: str
thread: ThreadId
class Row(NamedTuple):
jobs: List[Job]
thread: ThreadId
Gantt = List[Row]
def add_job(row: Row, job: Job) -> None:
assert row.thread == job.thread, f"row: {row.thread}, job: {job.thread}"
if row.jobs:
assert (
row.jobs[-1].end <= job.start
), f"{row.jobs[-1].end} is not less than {job.start} (thread id: {row.thread})"
row.jobs.append(job)
def new_row(job: Job) -> Row:
return Row([job], job.thread)
def row_duration_ms(row: Row) -> Millisecond:
return utility.duration_ms(row.jobs[0].start, row.jobs[-1].end)
def row_computing_duration_ms(row: Row) -> Millisecond:
return Millisecond(sum([job_duration_ms(job) for job in row.jobs]))
def row_with_thread(g: Gantt, thread: ThreadId) -> Optional[Row]:
for row in g:
if row.thread == thread:
return row
return None
def add_row(g: Gantt, row: Row) -> None:
g.append(row)
def new_gantt(jobs: List[Job]) -> Gantt:
g: Gantt = []
for job in sorted(jobs, key=operator.attrgetter("start")):
if row := row_with_thread(g, job.thread):
add_job(row, job)
else:
add_row(g, new_row(job))
return g
def job_duration_ms(job: Job) -> Millisecond:
return utility.duration_ms(job.start, job.end)

View File

@ -0,0 +1,16 @@
from datetime import datetime
from gantt_types import Millisecond, Second
def duration_ms(start_time: datetime, end_time: datetime) -> Millisecond:
duration = end_time - start_time
return Millisecond((duration.seconds * 1000000 + duration.microseconds) // 1000)
def ms_to_s(m: Millisecond) -> Second:
return Second(m / 1000)
def s_to_ms(s: Second) -> Millisecond:
return Millisecond(s * 1000)

258
ASJobGraphEvents/rebuild.py Normal file
View File

@ -0,0 +1,258 @@
#!/usr/bin/env python3
"""
Rebuilds a DGML file. Requires Python 3.8.
"""
from typing import Dict, List, Tuple, Set, NamedTuple, Optional
import csv, re, os, operator, sys
import xml.etree.ElementTree as ET
maxsize = sys.maxsize
while True:
try:
csv.field_size_limit(maxsize)
break
except OverflowError:
maxsize //= 2
# TYPES
class Row(NamedTuple):
guid: str
order_marker: int
textdata: str
# PARSING
def load_file(filename: str) -> List[Row]:
"""
Returns a list of events, not sorted or filtered.
"""
_, ext = os.path.splitext(filename)
if ext == ".csv":
with open(filename) as file:
dict_rows = csv.DictReader(file)
rows = [
make_row_from_jarvis(
row["MessageText"],
row["CurrentActivityId"],
int(row["Engine_EventSubclass"]),
)
for row in dict_rows
]
return [r for r in rows if r]
elif ext == ".xml":
tree = ET.parse(filename)
ns = {"": "http://tempuri.org/TracePersistence.xsd"}
xml_rows: List[Optional[Row]] = []
for event in tree.findall(".//Event", ns):
xml_rows.append(make_row_from_xml(event, ns))
return [r for r in xml_rows if r]
else:
return []
def make_row_from_xml(event: ET.Element, ns: Dict[str, str]) -> Optional[Row]:
if event.attrib["id"] != "134":
return None
textdata = None
order_marker = None
guid = None
subclass = None
for col in event.findall("Column", ns):
if col.attrib["id"] == "46" or col.attrib["id"] == "53":
guid = col.text
if col.attrib["id"] == "1":
subclass = col.text
if col.attrib["id"] == "10" and col.text:
order_marker = int(col.text)
if col.attrib["id"] == "42":
textdata = col.text
if textdata and order_marker is not None and guid and subclass:
suffix = "annotated" if subclass == "2" else "plan"
return Row(f"{guid}-{suffix}", order_marker, textdata)
return None
def make_row_from_jarvis(
message_txt: str, activity_id: str, subclass: int
) -> Optional[Row]:
guid = activity_id + str(subclass) + ("-annotated" if subclass == 2 else "-plan")
if "graphcorrelationid" in message_txt.lower():
match = re.match(
r"TextData: (.*); GraphCorrelationID: (.*); IntegerData: (.\d*)",
message_txt,
)
if match:
textdata, order_marker_str = match.group(1, 3)
else:
match = re.match(r"TextData: (.*); IntegerData: (.\d*)", message_txt)
if match:
textdata, order_marker_str = match.group(1, 2)
try:
order_marker = int(order_marker_str)
return Row(guid, order_marker, textdata)
except UnboundLocalError:
return None
def extract_metadata(header_row: Row) -> Optional[Tuple[int, int]]:
# should really extract things correctly here
m = re.match(
r".*Length=\"(\d*)\".*AdditionalEvents=\"(\d*)\".*", header_row.textdata
)
if not m:
return None
return int(m.group(1)), int(m.group(2))
def remove_pii_tags(protected_data: str) -> str:
if protected_data[:5] == "<pii>" and protected_data[-6:] == "</pii>":
return protected_data[5:-6]
return protected_data
def get_all_guids(data: List[Row]) -> Set[str]:
return {row.guid for row in data}
# GRAPH
def get_graph(data: List[Row], guid: str) -> Tuple[str, str]:
rows = [row for row in data if row.guid == guid]
rows = sorted(rows, key=operator.attrgetter("order_marker"))
header, *graph_data = rows
metadata = extract_metadata(header)
if metadata:
size, additional_events = metadata
assert additional_events == len(
graph_data
), f"metadata says there are {additional_events} rows; but there are {len(graph_data)}"
graph_str_builder = [remove_pii_tags(row.textdata) for row in graph_data]
return "".join(graph_str_builder), guid
# INPUT/OUTPUT FILES
def get_all_event_files() -> List[str]:
return [os.path.join("data", f) for f in os.listdir("data")]
def get_output_file(input_file: str, guid: str, output_folder: str) -> str:
_, input_file = os.path.split(input_file)
name, ext = os.path.splitext(input_file)
os.makedirs(output_folder, exist_ok=True)
return os.path.join(output_folder, f"{name}-{guid}.DGML")
def writefile(filename: str, data: str) -> None:
with open(filename, "w") as file:
file.write(data)
def reassemble_file(filename: str) -> List[Tuple[str, str]]:
result: List[Tuple[str, str]] = []
try:
data = load_file(filename)
guids = get_all_guids(data)
for guid in guids:
result.append(get_graph(data, guid))
except (IndexError, ValueError) as e:
print(f"error processing {filename}: {e}")
return result
def all_files() -> None:
if not os.path.isdir("data"):
print("directory 'data' does not exist.")
return
for input_file in get_all_event_files():
try:
data = load_file(input_file)
guids = get_all_guids(data)
os.makedirs("output", exist_ok=True)
for guid in guids:
graph, _ = get_graph(data, guid)
output_file = get_output_file(input_file, guid, "output")
print(f'Saving "{output_file}"')
writefile(output_file, graph)
except (IndexError, ValueError) as e:
print(f"error processing {input_file}: {e}")
# SCRIPT
def print_help() -> None:
print(
"""
Guide for rebuild.py
(requires Python 3.8 or later)
Use:
\tpython rebuild.py \tRebuilds all graphs in "./data" and writes them to "./output".
\tpython rebuild.py <inputfile> <outputfolder>\tRebuilds <inputfile> and writes them to <outputfolder>
"""
)
def main() -> None:
if len(sys.argv) == 1:
print("Reassembling all graphs in ./data")
all_files()
if len(sys.argv) == 2:
print_help()
if len(sys.argv) == 3:
_, input_file, output_folder = sys.argv
for graph, guid in reassemble_file(input_file):
output_file = get_output_file(input_file, guid, output_folder)
print(f'Saving "{output_file}"')
writefile(get_output_file(input_file, guid, output_folder), graph)
if __name__ == "__main__":
main()

View File

@ -22,6 +22,9 @@ BISM Normalizer is a schema diff tool for tabular models
## [UsqlScripts](https://github.com/Microsoft/Analysis-Services/tree/master/UsqlScripts) ## [UsqlScripts](https://github.com/Microsoft/Analysis-Services/tree/master/UsqlScripts)
Sample U-SQL scripts that demonstrate how to process a TPC-DS data set in Azure Data Lake. Sample U-SQL scripts that demonstrate how to process a TPC-DS data set in Azure Data Lake.
## [ASJobGraph](https://github.com/Microsoft/Analysis-Services/tree/master/ASJobGraphEvents)
Python script to reassemble job graph events from Anlysis Services.
## Code of Conduct ## Code of Conduct
This project has adopted the [Microsoft Open Source Code of This project has adopted the [Microsoft Open Source Code of
Conduct](https://opensource.microsoft.com/codeofconduct/). Conduct](https://opensource.microsoft.com/codeofconduct/).