Skip to content

Commit

Permalink
Support view extends
Browse files Browse the repository at this point in the history
  • Loading branch information
paco-valdez committed Jun 13, 2024
1 parent 7b4a48a commit 474a546
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 119 deletions.
10 changes: 8 additions & 2 deletions lkml2cube/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ def cubes(
printonly: Annotated[
bool, typer.Option(help="Print to stdout the parsed files")
] = False,
rootdir: Annotated[
str, typer.Option(help="The path to prepend to include paths")
] = None,
):
"""
Generate cubes-only given a LookML file that contains LookML Views.
"""

lookml_model = file_loader(file_path)
lookml_model = file_loader(file_path, rootdir)

if lookml_model is None:
typer.echo(f"No files were found on path: {file_path}")
Expand Down Expand Up @@ -81,12 +84,15 @@ def views(
printonly: Annotated[
bool, typer.Option(help="Print to stdout the parsed files")
] = False,
rootdir: Annotated[
str, typer.Option(help="The path to prepend to include paths")
] = None,
):
"""
Generate cubes-only given a LookML file that contains LookML Views.
"""

lookml_model = file_loader(file_path)
lookml_model = file_loader(file_path, rootdir)

if lookml_model is None:
typer.echo(f"No files were found on path: {file_path}")
Expand Down
78 changes: 47 additions & 31 deletions lkml2cube/parser/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,78 +8,94 @@

visited_path = {}


def update_namespace(namespace, new_file):

if namespace is None:
return new_file
for key, value in new_file.items():
if key in namespace and key in ('views', 'explores'):
if key in namespace and key in ("views", "explores"):
namespace[key] = namespace[key] + new_file[key]
elif key in namespace and key in ('includes'): # remove duplicates
elif key in namespace and key in ("includes"): # remove duplicates
namespace[key] = list(set(namespace[key] + new_file[key]))
elif key in ('views', 'explores', 'includes'):
elif key in ("views", "explores", "includes"):
namespace[key] = new_file[key]
elif key in ('connection'):
pass # ignored keys
elif key in ("connection"):
pass # ignored keys
else:
typer.echo(f'Key not supported yet: {key}')
typer.echo(f"Key not supported yet: {key}")
return namespace

def file_loader(file_path_input, namespace=None):

def file_loader(file_path_input, rootdir_param, namespace=None):

file_paths = glob.glob(file_path_input)
for file_path in file_paths:
if file_path in visited_path:
continue
visited_path[file_path] = True
lookml_model = lkml.load(open(file_path, 'r'))
if 'includes' in lookml_model:
for included_path in lookml_model['includes']:
if namespace and 'includes' in namespace and included_path in namespace['includes']:
lookml_model = lkml.load(open(file_path, "r"))
if "includes" in lookml_model:
for included_path in lookml_model["includes"]:
if (
namespace
and "includes" in namespace
and included_path in namespace["includes"]
):
continue
if included_path.startswith('/'):
if included_path.startswith("/"):
included_path = included_path[1:]
root_dir = dirname(abspath(file_path))
namespace = file_loader(join(root_dir, included_path), namespace=namespace)
if rootdir_param:
root_dir = rootdir_param
namespace = file_loader(
join(root_dir, included_path), rootdir_param, namespace=namespace
)
namespace = update_namespace(namespace, lookml_model)
return namespace


def write_single_file(cube_def: dict, outputdir: str, subdir: str = 'cubes', file_name: str = 'my_cubes.yml'):
def write_single_file(
cube_def: dict,
outputdir: str,
subdir: str = "cubes",
file_name: str = "my_cubes.yml",
):

f = open(join(outputdir, subdir, file_name), 'w')
f = open(join(outputdir, subdir, file_name), "w")
f.write(yaml.dump(cube_def, allow_unicode=True))
f.close()


def write_files(cube_def, outputdir):

if not cube_def:
raise Exception('No cube definition available')
for cube_root_element in ('cubes', 'views'):
raise Exception("No cube definition available")

for cube_root_element in ("cubes", "views"):

if cube_root_element in cube_def:

Path(join(outputdir, cube_root_element)).mkdir(parents=True, exist_ok=True)

if len(cube_def[cube_root_element]) == 1:
write_single_file(cube_def=cube_def,
outputdir=outputdir,
subdir=cube_root_element,
file_name=cube_def[cube_root_element][0]['name'] + '.yml')
write_single_file(
cube_def=cube_def,
outputdir=outputdir,
subdir=cube_root_element,
file_name=cube_def[cube_root_element][0]["name"] + ".yml",
)

elif len(cube_def[cube_root_element]) > 1:
for cube_element in cube_def[cube_root_element]:
new_def = {
cube_root_element: [cube_element]
}
write_single_file(cube_def=new_def,
outputdir=outputdir,
subdir=cube_root_element,
file_name=cube_element['name'] + '.yml')
new_def = {cube_root_element: [cube_element]}
write_single_file(
cube_def=new_def,
outputdir=outputdir,
subdir=cube_root_element,
file_name=cube_element["name"] + ".yml",
)
else:
# Empty 'cubes' definition
# not expected but not invalid
pass

189 changes: 103 additions & 86 deletions lkml2cube/parser/views.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

import copy
import traceback
import typer

Expand All @@ -7,126 +7,143 @@

def parse_view(lookml_model, raise_when_views_not_present=True):
cubes = []
cube_def = {
'cubes': cubes
}
rpl_table = lambda s: s.replace('${TABLE}', '{CUBE}').replace('${', '{')
cube_def = {"cubes": cubes}
rpl_table = lambda s: s.replace("${TABLE}", "{CUBE}").replace("${", "{")
type_map = {
'zipcode': 'string',
'string': 'string',
'number': 'number',
'tier': 'number',
'count': 'count',
'yesno': 'boolean',
'sum': 'sum',
'sum_distinct': 'sum',
'average': 'avg',
'average_distinct': 'avg',
'date': 'time',
'time': 'time',
'count_distinct': 'count_distinct_approx',
"zipcode": "string",
"string": "string",
"number": "number",
"tier": "number",
"count": "count",
"yesno": "boolean",
"sum": "sum",
"sum_distinct": "sum",
"average": "avg",
"average_distinct": "avg",
"date": "time",
"time": "time",
"count_distinct": "count_distinct_approx",
}
sets = {}

if raise_when_views_not_present and 'views' not in lookml_model:
raise Exception(f'The following object types are not implemented yet: {lookml_model.keys()}')
elif 'views' not in lookml_model:

if raise_when_views_not_present and "views" not in lookml_model:
raise Exception(
f"The following object types are not implemented yet: {lookml_model.keys()}"
)
elif "views" not in lookml_model:
return cube_def
for view in lookml_model['views']:

for view in lookml_model["views"]:
try:
if 'sets' in view:
for set in view['sets']:
sets[set['name']] = set['fields']
if "sets" in view:
for set in view["sets"]:
sets[set["name"]] = set["fields"]

label = view.get("label", view.get("view_label", view["name"]))
cube = {
'name': view['name'],
'dimensions': [],
'measures': [],
'joins': []
"name": view["name"],
"description": label,
"dimensions": [],
"measures": [],
"joins": [],
}
if 'sql_table_name' in view:
cube['sql_table'] = view['sql_table_name']
elif 'derived_table' in view and 'sql' in view['derived_table']:
cube['sql'] = view['derived_table']['sql']
else:
typer.echo(f'View type not implemented yet: {view['name']}')
continue

if 'dimensions' not in view:
typer.echo('cube does not support models without dimensions')
continue

for dimension in view['dimensions']:
if 'type' not in dimension:
if "extends" in view or "extends__all" in view:
extended_views = view.get("extends", view.get("extends__all", []))
extended_views = [x for l in extended_views for x in l]
parent_views = []
for lkml_view in extended_views:
found = False
for view_item in lookml_model["views"]:
if lkml_view == view_item["name"]:
parent_views.append(view_item)
found = True
if not found:
typer.echo(f"View not found: {lkml_view}")
parent_views.append(view)

# MRO is left to right
view = copy.deepcopy(parent_views.pop(0))
while len(parent_views) > 0:
next_view = parent_views.pop(0)
view.update(next_view)

if "sql_table_name" in view:
cube["sql_table"] = view["sql_table_name"]
elif "derived_table" in view and "sql" in view["derived_table"]:
cube["sql"] = view["derived_table"]["sql"]

for dimension in view.get("dimensions", []):
if "type" not in dimension:
# Defaults to string, cube needs a type.
dimension['type'] = 'string'
if dimension['type'] not in type_map:
typer.echo(f'Dimension type: {dimension["type"]} not implemented yet:\n {dimension}')
dimension["type"] = "string"
if dimension["type"] not in type_map:
typer.echo(
f'Dimension type: {dimension["type"]} not implemented yet:\n {dimension}'
)
continue
cube_dimension = {
'name': dimension['name'],
'sql': rpl_table(dimension['sql']),
'type': type_map[dimension['type']]
"name": dimension["name"],
"sql": rpl_table(dimension["sql"]),
"type": type_map[dimension["type"]],
}
if dimension['type'] == 'tier':
bins = dimension.get('bins', dimension.get('tiers'))
if dimension["type"] == "tier":
bins = dimension.get("bins", dimension.get("tiers"))
if not bins:
typer.echo(f'Dimension type: {dimension["type"]} requires tiers')
typer.echo(
f'Dimension type: {dimension["type"]} requires tiers'
)
continue
if len(bins) < 2:
pass
else:
tier_sql = f'CASE '
tier_sql = f"CASE "
for i in range(0, len(bins) - 1):
tier_sql += f" WHEN {cube_dimension['sql']} >= {bins[i]} AND {cube_dimension['sql']} < {bins[i + 1]} THEN {bins[i]} "
tier_sql += 'ELSE NULL END'
cube_dimension['sql'] = tier_sql
cube['dimensions'].append(cube_dimension)

if 'dimension_groups' in view:
for dimension in view['dimension_groups']:
if 'type' not in dimension:
typer.echo(f'Dimension type: is required for {dimension.get("name")}')
cube_dimension = {
'name': dimension['name'],
'sql': rpl_table(dimension['sql']),
'type': type_map[dimension['type']]
}
cube['dimensions'].append(cube_dimension)
tier_sql += "ELSE NULL END"
cube_dimension["sql"] = tier_sql
cube["dimensions"].append(cube_dimension)

if 'measures' not in view:
cubes.append(cube)
continue
for dimension in view.get("dimension_groups", []):
if "type" not in dimension:
typer.echo(
f'Dimension type: is required for {dimension.get("name")}'
)
cube_dimension = {
"name": dimension["name"],
"sql": rpl_table(dimension["sql"]),
"type": type_map[dimension["type"]],
}
cube["dimensions"].append(cube_dimension)

for measure in view['measures']:
if measure['type'] not in type_map:
for measure in view.get("measures", []):
if measure["type"] not in type_map:
msg = f'Measure type: {measure["type"]} not implemented yet:\n# {measure}'
typer.echo(f'# {msg}')
typer.echo(f"# {msg}")
continue
cube_measure = {
'name': measure['name'],
'type': type_map[measure['type']]
"name": measure["name"],
"type": type_map[measure["type"]],
}
if measure['type'] != 'count':
cube_measure['sql'] = rpl_table(measure['sql'])
elif 'drill_fields' in measure:
if measure["type"] != "count":
cube_measure["sql"] = rpl_table(measure["sql"])
elif "drill_fields" in measure:
drill_members = []
for drill_field in measure['drill_fields']:
if '*' in drill_field:
drill_field = drill_field.replace('*', '')
for drill_field in measure["drill_fields"]:
if "*" in drill_field:
drill_field = drill_field.replace("*", "")
if drill_field not in sets:
typer.echo(f'set undefined {drill_field}')
typer.echo(f"set undefined {drill_field}")
else:
drill_members += sets[drill_field]
else:
drill_members.append(drill_field)
cube_measure['drill_members'] = drill_members
cube_measure["drill_members"] = drill_members

cube['measures'].append(cube_measure)
cube["measures"].append(cube_measure)

cubes.append(cube)
except Exception:
typer.echo(f'Error while parsing view: {pformat(view)}')
typer.echo(f"Error while parsing view: {pformat(view)}")
typer.echo(traceback.format_exc())
return cube_def

0 comments on commit 474a546

Please sign in to comment.