Merge branch 'master' of https://github.com/microsoft/Analysis-Services
This commit is contained in:
commit
4a1ee084b8
42
ASJobGraphEvents/README.md
Normal file
42
ASJobGraphEvents/README.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Job Graph Events in Power BI
|
||||
|
||||
Job Graph events can be used to identify bottlenecks in data refreshes by highlighting the critical path. For instances of Analysis Services not running on-premise, the graph is broken into 16 Kb chunks, each in their own event. The events can be reassembled with this script.
|
||||
|
||||
# Rebuilding the DGMl file
|
||||
|
||||
## Requirements
|
||||
|
||||
* Python 3.8 or later
|
||||
* Visual Studio
|
||||
|
||||
## Usage
|
||||
|
||||
1. Start a trace in SQL Server Profiler and select "Job Graph Events".
|
||||
2. Start a data refresh ("Process Full" in SQL Server Management Studio).
|
||||
3. Wait for all trace events to arrive in Profiler.
|
||||
4. `File > Save As > Trace XML File`
|
||||
5. Aim `rebuild.py` at this file like so:
|
||||
|
||||
```bash
|
||||
python rebuild.py path\to\trace.xml output_folder
|
||||
```
|
||||
|
||||
6. Inside `output_folder` there will be two .DGML files, which can be opened in Visual Studio.
|
||||
|
||||
# Creating a Gantt Chart
|
||||
|
||||
## Requirements
|
||||
|
||||
* Python 3.8 or later
|
||||
* A valid job graph DGML file (from above)
|
||||
|
||||
## Usage
|
||||
|
||||
1. Get a .DGML file with all the anntoations (running duration, waiting duration, etc.)
|
||||
2. Run `gantt\script.py` like so:
|
||||
|
||||
```bash
|
||||
python gantt\script.py path\to\file.dgml output_folder
|
||||
```
|
||||
|
||||
3. Inside `output_folder` there will be an .html file that can be opened in a browser.
|
0
ASJobGraphEvents/gantt/__init__.py
Normal file
0
ASJobGraphEvents/gantt/__init__.py
Normal file
59
ASJobGraphEvents/gantt/dgml.py
Normal file
59
ASJobGraphEvents/gantt/dgml.py
Normal file
@ -0,0 +1,59 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Set, Iterable, Optional, cast
|
||||
from datetime import datetime
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from structures import Job
|
||||
from gantt_types import ThreadId
|
||||
|
||||
|
||||
def read_jobs(filename: str) -> List[Job]:
|
||||
jobs: List[Job] = []
|
||||
|
||||
doc = ET.parse(filename)
|
||||
root = doc.getroot()
|
||||
|
||||
try:
|
||||
nodes = [child for child in root if "nodes" in child.tag.lower()][0]
|
||||
except IndexError:
|
||||
return jobs
|
||||
|
||||
for node in nodes:
|
||||
if job := parse_job_node(node):
|
||||
jobs.append(job)
|
||||
|
||||
return jobs
|
||||
|
||||
|
||||
def parse_iso(time: str) -> datetime:
|
||||
if time[-1].lower() == "z":
|
||||
time = time[:-1]
|
||||
|
||||
return datetime.fromisoformat(time)
|
||||
|
||||
|
||||
def parse_thread_id(s: str) -> ThreadId:
|
||||
return ThreadId(int(s))
|
||||
|
||||
|
||||
def strip_newlines(s: str) -> str:
|
||||
|
||||
return "".join([c for c in s if ord(c) > 32])
|
||||
|
||||
|
||||
def parse_job_node(node: ET.Element) -> Optional[Job]:
|
||||
for attr, value in node.attrib.items():
|
||||
if attr == "StartedAt":
|
||||
start = parse_iso(value)
|
||||
if attr == "FinishedAt":
|
||||
end = parse_iso(value)
|
||||
if attr == "Label":
|
||||
name = value
|
||||
if attr == "Slot" or attr == "Thread":
|
||||
thread = value
|
||||
|
||||
try:
|
||||
return Job(start, end, strip_newlines(name), parse_thread_id(thread))
|
||||
except UnboundLocalError:
|
||||
# most likely doesn't include "Thread" or "Slot" attribute.
|
||||
return None
|
6
ASJobGraphEvents/gantt/gantt_types.py
Normal file
6
ASJobGraphEvents/gantt/gantt_types.py
Normal file
@ -0,0 +1,6 @@
|
||||
from typing import NewType
|
||||
|
||||
ThreadId = NewType("ThreadId", int)
|
||||
Millisecond = NewType("Millisecond", float)
|
||||
Second = NewType("Second", float)
|
||||
|
70
ASJobGraphEvents/gantt/output.css
Normal file
70
ASJobGraphEvents/gantt/output.css
Normal file
@ -0,0 +1,70 @@
|
||||
html {
|
||||
overflow-x: scroll;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
main {
|
||||
padding: 5px;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
|
||||
}
|
||||
|
||||
div.gantt {
|
||||
position: relative;
|
||||
padding: 25px 0px;
|
||||
}
|
||||
|
||||
div.row {
|
||||
white-space: nowrap;
|
||||
position: relative;
|
||||
height: 2em;
|
||||
}
|
||||
|
||||
div.row:nth-child(even) {
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
|
||||
div.row>span.axis-tick {
|
||||
border-left: 1px solid #777777;
|
||||
position: absolute;
|
||||
padding-left: 1ch;
|
||||
display: inline-block;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
div.row>span.axis-tick:nth-of-type(2) {
|
||||
border-left: 0;
|
||||
}
|
||||
|
||||
div.row>span.job {
|
||||
display: inline-block;
|
||||
height: 100%;
|
||||
border-radius: 3px;
|
||||
border: 0.5px solid white;
|
||||
position: absolute;
|
||||
}
|
||||
|
||||
div.row>span.legend {
|
||||
display: inline-block;
|
||||
height: 100%;
|
||||
border-right: 1px solid #777777;
|
||||
}
|
||||
|
||||
span[data-descr]:hover::after,
|
||||
span[data-descr]:focus::after {
|
||||
content: attr(data-descr);
|
||||
position: absolute;
|
||||
left: 0px;
|
||||
top: 1.8em;
|
||||
min-width: 200px;
|
||||
border: 1px #aaaaaa solid;
|
||||
border-radius: 10px;
|
||||
background-color: #ffff8d;
|
||||
padding: 6px;
|
||||
color: #000000;
|
||||
font-size: 14px;
|
||||
z-index: 1;
|
||||
white-space: pre;
|
||||
}
|
128
ASJobGraphEvents/gantt/output.py
Normal file
128
ASJobGraphEvents/gantt/output.py
Normal file
@ -0,0 +1,128 @@
|
||||
from datetime import datetime
|
||||
from structures import Gantt, Row, Job
|
||||
from gantt_types import Second, Millisecond
|
||||
import structures
|
||||
import utility
|
||||
import operator
|
||||
|
||||
COLORS = [
|
||||
"#d50000",
|
||||
"#00bfa5",
|
||||
"#ff6f00",
|
||||
"#aa00ff",
|
||||
"#006064",
|
||||
"#ffd600",
|
||||
"#64dd17",
|
||||
]
|
||||
|
||||
HEADER_COLUMN_WIDTH = 240
|
||||
|
||||
|
||||
def ms_to_px(ms: Millisecond) -> float:
|
||||
return ms / 10
|
||||
|
||||
|
||||
def job_to_html(job: Job, start: datetime, color: str) -> str:
|
||||
left = ms_to_px(utility.duration_ms(start, job.start)) + HEADER_COLUMN_WIDTH
|
||||
width = ms_to_px(structures.job_duration_ms(job))
|
||||
|
||||
return f"""<span class="job" data-descr="{job.name}{chr(10)}Duration: {utility.ms_to_s(structures.job_duration_ms(job)):.2f}s" style="left: {left}px; width: {width}px; background-color: {color}"></span>"""
|
||||
|
||||
|
||||
def row_to_html(
|
||||
row: Row, start: datetime, process_num: int, color: str, width: float
|
||||
) -> str:
|
||||
legend_html = f"""<span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Concurrency Slot {process_num} ({utility.ms_to_s(structures.row_computing_duration_ms(row)):.1f}s)</span>"""
|
||||
|
||||
jobs_html = "\n".join([job_to_html(job, start, color) for job in row.jobs])
|
||||
|
||||
return (
|
||||
f"""<div class="row" style="width: {width}px;">{legend_html}{jobs_html}</div>"""
|
||||
)
|
||||
|
||||
|
||||
def rownum_to_top(num: int) -> float:
|
||||
return num * 2
|
||||
|
||||
|
||||
def make_axis_span(left: float, s: Second) -> str:
|
||||
return f"""<span class="axis-tick" style="left: {left}px;">{s} sec</span>"""
|
||||
|
||||
|
||||
def make_axis_html(max_seconds: Second) -> str:
|
||||
seconds = [Second(i * 2) for i in range(1000)]
|
||||
|
||||
seconds = [i for i in seconds if i < max_seconds]
|
||||
|
||||
axis_spans = "".join(
|
||||
[
|
||||
make_axis_span(ms_to_px(utility.s_to_ms(s)) + HEADER_COLUMN_WIDTH, s)
|
||||
for s in seconds
|
||||
]
|
||||
)
|
||||
|
||||
return f"""<div class="row axis">
|
||||
<span class="legend" style="width: {HEADER_COLUMN_WIDTH}px">Total Processing Time</span>
|
||||
{axis_spans}
|
||||
</div>"""
|
||||
|
||||
|
||||
def gantt_to_html(g: Gantt) -> str:
|
||||
if not g:
|
||||
return ""
|
||||
|
||||
start = min([row.jobs[0].start for row in g])
|
||||
|
||||
max_seconds = max([utility.ms_to_s(structures.row_duration_ms(row)) for row in g])
|
||||
|
||||
rows_html = "\n".join(
|
||||
[
|
||||
row_to_html(
|
||||
row,
|
||||
start,
|
||||
num + 1,
|
||||
COLORS[num % len(COLORS)],
|
||||
ms_to_px(utility.s_to_ms(max_seconds)) + HEADER_COLUMN_WIDTH,
|
||||
)
|
||||
for num, row in enumerate(
|
||||
sorted(
|
||||
g,
|
||||
reverse=True,
|
||||
key=lambda r: structures.row_computing_duration_ms(r),
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
return f"""<div class="gantt">{make_axis_html(max_seconds)}{rows_html}</div>"""
|
||||
|
||||
|
||||
def style() -> str:
|
||||
with open("./gantt/output.css") as css:
|
||||
return f"""<style>{css.read()}</style>"""
|
||||
|
||||
|
||||
def html(g: Gantt) -> str:
|
||||
html = f"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<main>
|
||||
<h1>Gantt Chart</h1>
|
||||
<p>Max parallelism: {len(g)}</p>
|
||||
{gantt_to_html(g)}
|
||||
<h1>Explanation</h1>
|
||||
<p>
|
||||
<ul>
|
||||
<li>Each row represents a parallelism "slot"; if "maxParallelism" was 4, then there are 4 rows.</li>
|
||||
<li>Each colored block is a job; hover with a mouse to show the name and how long it took.</li>
|
||||
<li>Each row shows the total time spent doing jobs to highlight bottlenecks.</li>
|
||||
</ul>
|
||||
</p>
|
||||
</main>
|
||||
{style()}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
return html if g else ""
|
86
ASJobGraphEvents/gantt/script.py
Normal file
86
ASJobGraphEvents/gantt/script.py
Normal file
@ -0,0 +1,86 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Set, Iterable, Optional, cast
|
||||
import os, sys
|
||||
|
||||
from structures import Job, Gantt, Row, new_gantt
|
||||
|
||||
import dgml, output
|
||||
|
||||
|
||||
def get_dir(folder: str) -> Set[str]:
|
||||
return set(
|
||||
[
|
||||
os.path.join(folder, filename)
|
||||
for filename in os.listdir(folder)
|
||||
if os.path.isfile(os.path.join(folder, filename))
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def write_document(content: str, filepath: str) -> None:
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
|
||||
with open(filepath, "w") as file:
|
||||
file.write(content)
|
||||
|
||||
|
||||
def output_file_path(file: str, out_folder: str) -> str:
|
||||
base = os.path.basename(file)
|
||||
base, ext = os.path.splitext(base)
|
||||
|
||||
return os.path.join(out_folder, base + ".html")
|
||||
|
||||
|
||||
def make_gantt(file: str, out_folder: str) -> None:
|
||||
html = output.html(new_gantt(dgml.read_jobs(file)))
|
||||
|
||||
if not html:
|
||||
print(f"No jobs found in {file}; maybe this is not the -annotated file?")
|
||||
else:
|
||||
write_document(html, output_file_path(file, out_folder))
|
||||
print(f'Saving "{output_file_path(file, out_folder)}"')
|
||||
|
||||
|
||||
def make_gantt_dir(folder: str, out_folder: str) -> None:
|
||||
for file in get_dir(folder):
|
||||
make_gantt(file, out_folder)
|
||||
|
||||
|
||||
# SCRIPT
|
||||
|
||||
|
||||
def print_help() -> None:
|
||||
print(
|
||||
"""
|
||||
Guide for gantt/script.py
|
||||
|
||||
(requires Python 3.8 or later)
|
||||
|
||||
Use:
|
||||
|
||||
\tpython gantt/script.py <input folder> <output folder>
|
||||
\t\tRebuilds all graphs in "./data" and writes them to "./output".
|
||||
|
||||
\tpython rebuild.py <inputfile> <inputfile> ... <outputfolder>
|
||||
\t\tRebuilds <inputfile>s and writes them to <outputfolder>
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) < 3:
|
||||
print_help()
|
||||
else:
|
||||
_, *inputs, output_folder = sys.argv
|
||||
|
||||
for i in inputs:
|
||||
if os.path.isfile(i):
|
||||
make_gantt(i, output_folder)
|
||||
elif os.path.isdir(i):
|
||||
make_gantt_dir(i, output_folder)
|
||||
else:
|
||||
print(f"{i} is not a file or directory.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
72
ASJobGraphEvents/gantt/structures.py
Normal file
72
ASJobGraphEvents/gantt/structures.py
Normal file
@ -0,0 +1,72 @@
|
||||
from typing import Tuple, List, NamedTuple, Optional, NewType
|
||||
from datetime import datetime
|
||||
|
||||
import utility
|
||||
from gantt_types import ThreadId, Millisecond, Second
|
||||
|
||||
import operator
|
||||
|
||||
|
||||
class Job(NamedTuple):
|
||||
start: datetime
|
||||
end: datetime
|
||||
name: str
|
||||
thread: ThreadId
|
||||
|
||||
|
||||
class Row(NamedTuple):
|
||||
jobs: List[Job]
|
||||
thread: ThreadId
|
||||
|
||||
|
||||
Gantt = List[Row]
|
||||
|
||||
|
||||
def add_job(row: Row, job: Job) -> None:
|
||||
assert row.thread == job.thread, f"row: {row.thread}, job: {job.thread}"
|
||||
|
||||
if row.jobs:
|
||||
assert (
|
||||
row.jobs[-1].end <= job.start
|
||||
), f"{row.jobs[-1].end} is not less than {job.start} (thread id: {row.thread})"
|
||||
|
||||
row.jobs.append(job)
|
||||
|
||||
|
||||
def new_row(job: Job) -> Row:
|
||||
return Row([job], job.thread)
|
||||
|
||||
|
||||
def row_duration_ms(row: Row) -> Millisecond:
|
||||
return utility.duration_ms(row.jobs[0].start, row.jobs[-1].end)
|
||||
|
||||
|
||||
def row_computing_duration_ms(row: Row) -> Millisecond:
|
||||
return Millisecond(sum([job_duration_ms(job) for job in row.jobs]))
|
||||
|
||||
|
||||
def row_with_thread(g: Gantt, thread: ThreadId) -> Optional[Row]:
|
||||
for row in g:
|
||||
if row.thread == thread:
|
||||
return row
|
||||
return None
|
||||
|
||||
|
||||
def add_row(g: Gantt, row: Row) -> None:
|
||||
g.append(row)
|
||||
|
||||
|
||||
def new_gantt(jobs: List[Job]) -> Gantt:
|
||||
g: Gantt = []
|
||||
|
||||
for job in sorted(jobs, key=operator.attrgetter("start")):
|
||||
if row := row_with_thread(g, job.thread):
|
||||
add_job(row, job)
|
||||
else:
|
||||
add_row(g, new_row(job))
|
||||
|
||||
return g
|
||||
|
||||
|
||||
def job_duration_ms(job: Job) -> Millisecond:
|
||||
return utility.duration_ms(job.start, job.end)
|
16
ASJobGraphEvents/gantt/utility.py
Normal file
16
ASJobGraphEvents/gantt/utility.py
Normal file
@ -0,0 +1,16 @@
|
||||
from datetime import datetime
|
||||
from gantt_types import Millisecond, Second
|
||||
|
||||
|
||||
def duration_ms(start_time: datetime, end_time: datetime) -> Millisecond:
|
||||
duration = end_time - start_time
|
||||
|
||||
return Millisecond((duration.seconds * 1000000 + duration.microseconds) // 1000)
|
||||
|
||||
|
||||
def ms_to_s(m: Millisecond) -> Second:
|
||||
return Second(m / 1000)
|
||||
|
||||
|
||||
def s_to_ms(s: Second) -> Millisecond:
|
||||
return Millisecond(s * 1000)
|
258
ASJobGraphEvents/rebuild.py
Normal file
258
ASJobGraphEvents/rebuild.py
Normal file
@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Rebuilds a DGML file. Requires Python 3.8.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Tuple, Set, NamedTuple, Optional
|
||||
import csv, re, os, operator, sys
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
maxsize = sys.maxsize
|
||||
while True:
|
||||
try:
|
||||
csv.field_size_limit(maxsize)
|
||||
break
|
||||
except OverflowError:
|
||||
maxsize //= 2
|
||||
|
||||
# TYPES
|
||||
|
||||
|
||||
class Row(NamedTuple):
|
||||
guid: str
|
||||
order_marker: int
|
||||
textdata: str
|
||||
|
||||
|
||||
# PARSING
|
||||
|
||||
|
||||
def load_file(filename: str) -> List[Row]:
|
||||
"""
|
||||
Returns a list of events, not sorted or filtered.
|
||||
"""
|
||||
_, ext = os.path.splitext(filename)
|
||||
|
||||
if ext == ".csv":
|
||||
with open(filename) as file:
|
||||
dict_rows = csv.DictReader(file)
|
||||
rows = [
|
||||
make_row_from_jarvis(
|
||||
row["MessageText"],
|
||||
row["CurrentActivityId"],
|
||||
int(row["Engine_EventSubclass"]),
|
||||
)
|
||||
for row in dict_rows
|
||||
]
|
||||
|
||||
return [r for r in rows if r]
|
||||
|
||||
elif ext == ".xml":
|
||||
tree = ET.parse(filename)
|
||||
ns = {"": "http://tempuri.org/TracePersistence.xsd"}
|
||||
|
||||
xml_rows: List[Optional[Row]] = []
|
||||
|
||||
for event in tree.findall(".//Event", ns):
|
||||
xml_rows.append(make_row_from_xml(event, ns))
|
||||
|
||||
return [r for r in xml_rows if r]
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def make_row_from_xml(event: ET.Element, ns: Dict[str, str]) -> Optional[Row]:
|
||||
if event.attrib["id"] != "134":
|
||||
return None
|
||||
|
||||
textdata = None
|
||||
order_marker = None
|
||||
guid = None
|
||||
subclass = None
|
||||
|
||||
for col in event.findall("Column", ns):
|
||||
if col.attrib["id"] == "46" or col.attrib["id"] == "53":
|
||||
guid = col.text
|
||||
|
||||
if col.attrib["id"] == "1":
|
||||
subclass = col.text
|
||||
|
||||
if col.attrib["id"] == "10" and col.text:
|
||||
order_marker = int(col.text)
|
||||
|
||||
if col.attrib["id"] == "42":
|
||||
textdata = col.text
|
||||
|
||||
if textdata and order_marker is not None and guid and subclass:
|
||||
suffix = "annotated" if subclass == "2" else "plan"
|
||||
return Row(f"{guid}-{suffix}", order_marker, textdata)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def make_row_from_jarvis(
|
||||
message_txt: str, activity_id: str, subclass: int
|
||||
) -> Optional[Row]:
|
||||
guid = activity_id + str(subclass) + ("-annotated" if subclass == 2 else "-plan")
|
||||
|
||||
if "graphcorrelationid" in message_txt.lower():
|
||||
match = re.match(
|
||||
r"TextData: (.*); GraphCorrelationID: (.*); IntegerData: (.\d*)",
|
||||
message_txt,
|
||||
)
|
||||
if match:
|
||||
textdata, order_marker_str = match.group(1, 3)
|
||||
else:
|
||||
match = re.match(r"TextData: (.*); IntegerData: (.\d*)", message_txt)
|
||||
|
||||
if match:
|
||||
textdata, order_marker_str = match.group(1, 2)
|
||||
|
||||
try:
|
||||
order_marker = int(order_marker_str)
|
||||
return Row(guid, order_marker, textdata)
|
||||
except UnboundLocalError:
|
||||
return None
|
||||
|
||||
|
||||
def extract_metadata(header_row: Row) -> Optional[Tuple[int, int]]:
|
||||
# should really extract things correctly here
|
||||
m = re.match(
|
||||
r".*Length=\"(\d*)\".*AdditionalEvents=\"(\d*)\".*", header_row.textdata
|
||||
)
|
||||
|
||||
if not m:
|
||||
return None
|
||||
|
||||
return int(m.group(1)), int(m.group(2))
|
||||
|
||||
|
||||
def remove_pii_tags(protected_data: str) -> str:
|
||||
if protected_data[:5] == "<pii>" and protected_data[-6:] == "</pii>":
|
||||
return protected_data[5:-6]
|
||||
return protected_data
|
||||
|
||||
|
||||
def get_all_guids(data: List[Row]) -> Set[str]:
|
||||
return {row.guid for row in data}
|
||||
|
||||
|
||||
# GRAPH
|
||||
|
||||
|
||||
def get_graph(data: List[Row], guid: str) -> Tuple[str, str]:
|
||||
rows = [row for row in data if row.guid == guid]
|
||||
|
||||
rows = sorted(rows, key=operator.attrgetter("order_marker"))
|
||||
|
||||
header, *graph_data = rows
|
||||
|
||||
metadata = extract_metadata(header)
|
||||
|
||||
if metadata:
|
||||
size, additional_events = metadata
|
||||
assert additional_events == len(
|
||||
graph_data
|
||||
), f"metadata says there are {additional_events} rows; but there are {len(graph_data)}"
|
||||
|
||||
graph_str_builder = [remove_pii_tags(row.textdata) for row in graph_data]
|
||||
|
||||
return "".join(graph_str_builder), guid
|
||||
|
||||
|
||||
# INPUT/OUTPUT FILES
|
||||
|
||||
|
||||
def get_all_event_files() -> List[str]:
|
||||
return [os.path.join("data", f) for f in os.listdir("data")]
|
||||
|
||||
|
||||
def get_output_file(input_file: str, guid: str, output_folder: str) -> str:
|
||||
_, input_file = os.path.split(input_file)
|
||||
name, ext = os.path.splitext(input_file)
|
||||
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
|
||||
return os.path.join(output_folder, f"{name}-{guid}.DGML")
|
||||
|
||||
|
||||
def writefile(filename: str, data: str) -> None:
|
||||
with open(filename, "w") as file:
|
||||
file.write(data)
|
||||
|
||||
|
||||
def reassemble_file(filename: str) -> List[Tuple[str, str]]:
|
||||
result: List[Tuple[str, str]] = []
|
||||
|
||||
try:
|
||||
data = load_file(filename)
|
||||
guids = get_all_guids(data)
|
||||
|
||||
for guid in guids:
|
||||
result.append(get_graph(data, guid))
|
||||
except (IndexError, ValueError) as e:
|
||||
print(f"error processing {filename}: {e}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def all_files() -> None:
|
||||
if not os.path.isdir("data"):
|
||||
print("directory 'data' does not exist.")
|
||||
return
|
||||
|
||||
for input_file in get_all_event_files():
|
||||
try:
|
||||
data = load_file(input_file)
|
||||
guids = get_all_guids(data)
|
||||
|
||||
os.makedirs("output", exist_ok=True)
|
||||
|
||||
for guid in guids:
|
||||
graph, _ = get_graph(data, guid)
|
||||
output_file = get_output_file(input_file, guid, "output")
|
||||
print(f'Saving "{output_file}"')
|
||||
writefile(output_file, graph)
|
||||
|
||||
except (IndexError, ValueError) as e:
|
||||
print(f"error processing {input_file}: {e}")
|
||||
|
||||
|
||||
# SCRIPT
|
||||
|
||||
|
||||
def print_help() -> None:
|
||||
print(
|
||||
"""
|
||||
Guide for rebuild.py
|
||||
|
||||
(requires Python 3.8 or later)
|
||||
|
||||
Use:
|
||||
|
||||
\tpython rebuild.py \tRebuilds all graphs in "./data" and writes them to "./output".
|
||||
|
||||
\tpython rebuild.py <inputfile> <outputfolder>\tRebuilds <inputfile> and writes them to <outputfolder>
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) == 1:
|
||||
print("Reassembling all graphs in ./data")
|
||||
all_files()
|
||||
if len(sys.argv) == 2:
|
||||
print_help()
|
||||
if len(sys.argv) == 3:
|
||||
_, input_file, output_folder = sys.argv
|
||||
|
||||
for graph, guid in reassemble_file(input_file):
|
||||
output_file = get_output_file(input_file, guid, output_folder)
|
||||
print(f'Saving "{output_file}"')
|
||||
writefile(get_output_file(input_file, guid, output_folder), graph)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -22,6 +22,9 @@ BISM Normalizer is a schema diff tool for tabular models
|
||||
## [UsqlScripts](https://github.com/Microsoft/Analysis-Services/tree/master/UsqlScripts)
|
||||
Sample U-SQL scripts that demonstrate how to process a TPC-DS data set in Azure Data Lake.
|
||||
|
||||
## [ASJobGraph](https://github.com/Microsoft/Analysis-Services/tree/master/ASJobGraphEvents)
|
||||
Python script to reassemble job graph events from Anlysis Services.
|
||||
|
||||
## Code of Conduct
|
||||
This project has adopted the [Microsoft Open Source Code of
|
||||
Conduct](https://opensource.microsoft.com/codeofconduct/).
|
||||
|
Loading…
Reference in New Issue
Block a user