Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/HazyResearch/deepdive int…
Browse files Browse the repository at this point in the history
…o fold-submodules
  • Loading branch information
netj committed Jan 24, 2017
2 parents 4bf4888 + 65c2b01 commit 1617b8c
Show file tree
Hide file tree
Showing 17 changed files with 39 additions and 42 deletions.
2 changes: 1 addition & 1 deletion compiler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Here's a brief summary of how the compilation is done.
The HOCON syntax used by `deepdive.conf` is interpreted by `hocon2json` and everything is converted into a single JSON config object that holds everything under the key "deepdive".

2. The config object is first extended with some implied extractors, such as initializing the database and loading input tables.
Then, the dependencies of extractors, factors, pipelines are normalized, and their names are qualified with corresponding prefixes (by `compile-config_normalized`) to make it easier and clearer to produce the final code for execution.
Then, the dependencies of extractors are normalized, and their names are qualified with corresponding prefixes (by `compile-config_normalized`) to make it easier and clearer to produce the final code for execution.
DeepDive's built-in processes for variables and factors, such as grounding, learning, inference, and calibration, are added to the config object after the normalization.
User's original config is kept intact under "deepdive" while the normalized one is created under a different key, "deepdive_".

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail
jq -r '
.deepdive_ as $deepdive
| $deepdive.execution.processes | to_entries[]
| select(.key | startswith("process/init/relation/"))
| select(.key | startswith($deepdive.execution.process_init_relation_prefix))
| select(.value.output_relation | in($deepdive.schema.variables_byName) | not) # do not treat variables as base relations
| .value.output_relation
' "$@" |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ $deepdive.execution.processes | to_entries[] |
[ .dependencies_[]?
| select(
(startswith("process/") and in($deepdive.execution.processes)
# TODO factor/ aren't really used, so remove?
or startswith("factor/") and (ltrimstr("factor/") | in($deepdive.inference.factors_byName))
or startswith("data/") and (ltrimstr("data/") | in($deepdive.schema.relations)
# XXX assume user is doing the right thing if schema.json is empty
or ($deepdive.schema.relations | length == 0))
Expand Down
7 changes: 0 additions & 7 deletions compiler/compile-code/compile-code-Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ def mktargets: mktargets("done");
# default commands
define CMD_data
endef
define CMD_pipeline
endef
define CMD_process
endef
define CMD_factor
Expand Down Expand Up @@ -57,11 +55,6 @@ reset: \(keys | mktargets("reset"))
$(RESET) \(.key | mktarget("done"))
"

# pipelines are special
, if .key | startswith("pipeline/") then "
.PHONY: \(.key | mktarget)
" else empty end

)

] | join("") }
15 changes: 9 additions & 6 deletions compiler/compile-code/compile-code-dataflow_dot
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
def nodeId: sub("/"; "/\n");
def nodeType: sub("/.*$"; "");

# produce all prefixes (longest to shortest, being empty)
def nodeTypes: split("/") | range(length;0;-1) as $i | .[:$i] | join("/");

(.deepdive_.execution.dot // {}) as $deepdiveDotConfig |

# Graphviz attrs (See: http://www.graphviz.org/doc/info/attrs.html#k:style)
Expand All @@ -28,14 +31,13 @@ def nodeType: sub("/.*$"; "");
# edge attributes by [srcType][dstType]
(
{ "" : { "": "color=\"#999999\"" }
, pipeline : { "": "style=dotted arrowhead=odiamond" }
} * ($deepdiveDotConfig.edge_attrs // {})
) as $edge_attrs |

.deepdive_.execution.dependencies |
{ path: "dataflow.dot", content: [
"
digraph \"\(env.DEEPDIVE_APP | sub(".*/"; "")) data flow\" {
digraph \"\(env.DEEPDIVE_APP // "" | sub(".*/"; "")) data flow\" {
graph[\($graph_attrs)
];
node [\(try ($node_attrs[""] // "") catch "")
Expand All @@ -46,16 +48,17 @@ digraph \"\(env.DEEPDIVE_APP | sub(".*/"; "")) data flow\" {
", ( to_entries[]
# process and data nodes
| "
\"\(.key | nodeId)\" [\($node_attrs[.key | nodeType] // "")];
\"\(.key | nodeId)\" [\([$node_attrs[.key | nodeTypes] | select(.)] | first // "")];
"

# dependency edges
, ( { from: .value[], to: .key } | "
\"\(.from | nodeId)\" -> \"\(.to | nodeId)\" [
label=\"\"
\(try ($edge_attrs[.from | nodeType] // $edge_attrs[""]) as $edge_attrs_from
| $edge_attrs_from[.to | nodeType] // $edge_attrs_from[""] // "" # TODO don't repeat $edge_attrs[""][""]
catch "")
\([ ($edge_attrs [.from | nodeTypes] | select(.)) as $edge_attrs_from
| $edge_attrs_from[.to | nodeTypes] | select(.)
] | first // "" # TODO don't repeat $edge_attrs[""][""]
)
];
" )

Expand Down
7 changes: 5 additions & 2 deletions compiler/compile-config/compile-config-0.00-init_objects
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@
| .deepdive_.extraction.extractors |= . + {}
| .deepdive_.inference |= . + {}
| .deepdive_.inference.factors |= . + {}
| .deepdive_.pipeline |= . + {}
| .deepdive_.pipeline.pipelines |= . + {}

# make sure our intermediate representation for execution plan set up
| .deepdive_.execution |= . + {}
| .deepdive_.execution.processes |= . + {}

# some default values
# allowing the initial process to be named something else
| .deepdive_.execution.process_init_app |= (. // "process/init/app")
| .deepdive_.execution.process_init_relation_prefix |= (. // "process/init/relation/")
7 changes: 5 additions & 2 deletions compiler/compile-config/compile-config-0.51-add_init_app
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

# add a database initialization process
| .deepdive_.execution.processes +=
{ "process/init/app":
# NOTE when process_init_app is overridden, compiler does not define the process and assume it points to an already defined process
if $deepdive.execution.process_init_app == "process/init/app" then
([{ key: $deepdive.execution.process_init_app, value:
{ style: "cmd_extractor"
, cmd: (
# allow overriding app initialization steps
Expand All @@ -26,4 +28,5 @@
input/init.sh
fi
") }
}
}] | from_entries)
else {} end
4 changes: 2 additions & 2 deletions compiler/compile-config/compile-config-0.52-input_loader
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
) as $process_defining_the_relation |
[ $deepdive.schema.relations // {} | to_entries[]
| select($process_defining_the_relation[.key] | not)
| { key: "process/init/relation/\(.key)"
| { key: "\($deepdive.execution.process_init_relation_prefix)\(.key)"
, value: { style: "cmd_extractor"
, cmd: "deepdive create table \(.key | @sh) && deepdive load \(.key | @sh)"
, dependencies_: [ "process/init/app" ]
, dependencies_: [ $deepdive.execution.process_init_app ]
, output_relation: .key
, output_: ["data/\(.key)"]
} }
Expand Down
12 changes: 0 additions & 12 deletions compiler/compile-config/compile-config-1.00-qualified_names
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#
# - extractor names are prefixed with process/*
# - factor names are prefixed with factor/*
# - pipeline names are prefixed with pipeline/*
# - output_relation names are prefixed with data/* and kept under a new key output_
# - dependencies are rewritten with qualified names under a new key dependencies_
##
Expand Down Expand Up @@ -31,16 +30,5 @@
)
)

# qualify names in pipelines
| .deepdive_.pipeline.pipelines |= with_entries
( .key as $p
| .key |= "pipeline/\(.)"
| .value |= map( if $deepdive.extraction.extractors[.] then "process/\(.)"
elif $deepdive.inference.factors[.] then "factor/\(.)"
else error("\(.): Neither an extractor or inference rule in pipeline \($p)")
end
)
)

# turn all extractors into processes in the execution plan under compilation
| .deepdive_.execution.processes += .deepdive_.extraction.extractors
2 changes: 1 addition & 1 deletion compiler/compile-config/compile-config-2.01-grounding
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ def factorWeightDescriptionSqlExpr:

## from_grounding
# A nominal process to make it easy to redo the grounding
# TODO remove this once deepdive-do supports process groups or pipelines
# TODO remove this once deepdive-do supports process groups
| .deepdive_.execution.processes += {
"process/grounding/from_grounding": {
style: "cmd_extractor", cmd: ": no-op"
Expand Down
4 changes: 2 additions & 2 deletions compiler/compile-config/compile-config-9.98-ensure_init_app
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
.deepdive_ as $deepdive

| .deepdive_.execution.processes |= with_entries(
if .key == "process/init/app" then . else
if .key == $deepdive.execution.process_init_app then . else
.value |=
# simply add init/app to all processes whose dependencies are empty
# (This is a good approximation to making all processes depend on
Expand All @@ -13,7 +13,7 @@
# there's a cycle in the dependency.)
( if (.dependencies_ | length) > 0
or (.input_ | length) > 0 then .
else .dependencies_ += ["process/init/app"]
else .dependencies_ += [$deepdive.execution.process_init_app]
end)
end
)
2 changes: 1 addition & 1 deletion compiler/deepdive-compile
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ deepdive-check -a -c "$PWD"/config.json 'compiled_*' 2>&1 | sed 's/^/ /' >&2

###############################################################################
STEP "Compiling executable code into:"
# compile extractors and factors under process/ and factor/
# compile extractors under process/
pids=(--)
for cc in "$DEEPDIVE_HOME"/util/compile-code/compile-code-*; do
[[ -x "$cc" ]] || continue
Expand Down
1 change: 1 addition & 0 deletions database/deepdive-load
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ relationIsntInAppSchema=true
# internal dd_label column but nothing else)
if [[ -z "$Columns" && -e "$DEEPDIVE_APP" ]] && app-has-been-compiled; then
# make sure app has been initialized
# TODO process_init_app from run/compiled/config.json
deepdive-done process/init/app ||
DEEPDIVE_PLAN_EDIT=false deepdive-do process/init/app
# find the list of columns
Expand Down
2 changes: 1 addition & 1 deletion doc/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ deepdive {
}
```

In this template, the global section `deepdive` contains following major sections: `db`, `schema`, `extraction`, `inference`, `calibration`. Other optional sections are `sampler` and `pipeline`.
In this template, the global section `deepdive` contains following major sections: `db`, `schema`, `extraction`, `inference`, `calibration`. Other optional sections are `sampler` and `execution`.

Links to these sections:

Expand Down
1 change: 1 addition & 0 deletions runner/deepdive-run
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ set -euo pipefail
# default target
[[ $# -gt 0 ]] || set -- model/calibration-plots

# TODO process_init_app from run/compiled/config.json
exec deepdive-redo process/init/app "$@"
10 changes: 8 additions & 2 deletions runner/format_timestamp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ set -eu
format_duration() {
local T=$1
if [[ $T -eq 0 ]]; then
echo "just now"
echo "0s"
else
[[ $T -gt 0 ]] || let T=$((0 - T))
local D=$((T/60/60/24))
Expand All @@ -34,7 +34,13 @@ for file; do
ts=$(date -r "$file" -Iseconds 2>/dev/null || date -r "$file" +%FT%T%:z)
nsecs=$(( $FORMAT_DURATION_SINCE - $(date -r "$file" +%s) ))
dur=$(format_duration $nsecs)
echo "$ts (${dur## } ago)"
echo "$ts ($(
case $nsecs in
(0) echo "just now" ;;
(-*) echo "in${dur}" ;;
(*) echo "${dur## } ago" ;;
esac
)"
exit
done
echo N/A
1 change: 1 addition & 0 deletions runner/resolve-args-to-do.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ unset MAKEFLAGS MFLAGS MAKEOVERRIDES MAKELEVEL
if [[ $# -gt 0 ]]; then
makeTargets=()
for target; do
# TODO recognize all
resolved=false
for fmt in %s %s.done {data,process,data/model,model,process/{model,grounding{,/{factor,variable}}}}/%s.done; do
makeTarget=$(printf "$fmt" "$target")
Expand Down

0 comments on commit 1617b8c

Please sign in to comment.