diff --git a/compiler/README.md b/compiler/README.md index 8e9dfddd2..2bd8be678 100644 --- a/compiler/README.md +++ b/compiler/README.md @@ -8,7 +8,7 @@ Here's a brief summary of how the compilation is done. The HOCON syntax used by `deepdive.conf` is interpreted by `hocon2json` and everything is converted into a single JSON config object that holds everything under the key "deepdive". 2. The config object is first extended with some implied extractors, such as initializing the database and loading input tables. - Then, the dependencies of extractors, factors, pipelines are normalized, and their names are qualified with corresponding prefixes (by `compile-config_normalized`) to make it easier and clearer to produce the final code for execution. + Then, the dependencies of extractors are normalized, and their names are qualified with corresponding prefixes (by `compile-config_normalized`) to make it easier and clearer to produce the final code for execution. DeepDive's built-in processes for variables and factors, such as grounding, learning, inference, and calibration, are added to the config object after the normalization. User's original config is kept intact under "deepdive" while the normalized one is created under a different key, "deepdive_". diff --git a/compiler/compile-check/compile-check-1-compiled_base_relations_have_input_data b/compiler/compile-check/compile-check-1-compiled_base_relations_have_input_data index 39d8cf799..0418e7082 100755 --- a/compiler/compile-check/compile-check-1-compiled_base_relations_have_input_data +++ b/compiler/compile-check/compile-check-1-compiled_base_relations_have_input_data @@ -5,7 +5,7 @@ set -euo pipefail jq -r ' .deepdive_ as $deepdive | $deepdive.execution.processes | to_entries[] -| select(.key | startswith("process/init/relation/")) +| select(.key | startswith($deepdive.execution.process_init_relation_prefix)) | select(.value.output_relation | in($deepdive.schema.variables_byName) | not) # do not treat variables as base relations | .value.output_relation ' "$@" | diff --git a/compiler/compile-check/compile-check-1-compiled_dependencies_correct b/compiler/compile-check/compile-check-1-compiled_dependencies_correct index 5fda7f825..cb3ddfb42 100755 --- a/compiler/compile-check/compile-check-1-compiled_dependencies_correct +++ b/compiler/compile-check/compile-check-1-compiled_dependencies_correct @@ -18,8 +18,6 @@ $deepdive.execution.processes | to_entries[] | [ .dependencies_[]? | select( (startswith("process/") and in($deepdive.execution.processes) - # TODO factor/ aren't really used, so remove? - or startswith("factor/") and (ltrimstr("factor/") | in($deepdive.inference.factors_byName)) or startswith("data/") and (ltrimstr("data/") | in($deepdive.schema.relations) # XXX assume user is doing the right thing if schema.json is empty or ($deepdive.schema.relations | length == 0)) diff --git a/compiler/compile-code/compile-code-Makefile b/compiler/compile-code/compile-code-Makefile index aa79d1ec3..630bf9ee5 100755 --- a/compiler/compile-code/compile-code-Makefile +++ b/compiler/compile-code/compile-code-Makefile @@ -15,8 +15,6 @@ def mktargets: mktargets("done"); # default commands define CMD_data endef -define CMD_pipeline -endef define CMD_process endef define CMD_factor @@ -57,11 +55,6 @@ reset: \(keys | mktargets("reset")) $(RESET) \(.key | mktarget("done")) " -# pipelines are special -, if .key | startswith("pipeline/") then " -.PHONY: \(.key | mktarget) -" else empty end - ) ] | join("") } diff --git a/compiler/compile-code/compile-code-dataflow_dot b/compiler/compile-code/compile-code-dataflow_dot index d19be8b15..81bc17a77 100755 --- a/compiler/compile-code/compile-code-dataflow_dot +++ b/compiler/compile-code/compile-code-dataflow_dot @@ -5,6 +5,9 @@ def nodeId: sub("/"; "/\n"); def nodeType: sub("/.*$"; ""); +# produce all prefixes (longest to shortest, being empty) +def nodeTypes: split("/") | range(length;0;-1) as $i | .[:$i] | join("/"); + (.deepdive_.execution.dot // {}) as $deepdiveDotConfig | # Graphviz attrs (See: http://www.graphviz.org/doc/info/attrs.html#k:style) @@ -28,14 +31,13 @@ def nodeType: sub("/.*$"; ""); # edge attributes by [srcType][dstType] ( { "" : { "": "color=\"#999999\"" } - , pipeline : { "": "style=dotted arrowhead=odiamond" } } * ($deepdiveDotConfig.edge_attrs // {}) ) as $edge_attrs | .deepdive_.execution.dependencies | { path: "dataflow.dot", content: [ " -digraph \"\(env.DEEPDIVE_APP | sub(".*/"; "")) data flow\" { +digraph \"\(env.DEEPDIVE_APP // "" | sub(".*/"; "")) data flow\" { graph[\($graph_attrs) ]; node [\(try ($node_attrs[""] // "") catch "") @@ -46,16 +48,17 @@ digraph \"\(env.DEEPDIVE_APP | sub(".*/"; "")) data flow\" { ", ( to_entries[] # process and data nodes | " -\"\(.key | nodeId)\" [\($node_attrs[.key | nodeType] // "")]; +\"\(.key | nodeId)\" [\([$node_attrs[.key | nodeTypes] | select(.)] | first // "")]; " # dependency edges , ( { from: .value[], to: .key } | " \"\(.from | nodeId)\" -> \"\(.to | nodeId)\" [ label=\"\" - \(try ($edge_attrs[.from | nodeType] // $edge_attrs[""]) as $edge_attrs_from - | $edge_attrs_from[.to | nodeType] // $edge_attrs_from[""] // "" # TODO don't repeat $edge_attrs[""][""] - catch "") + \([ ($edge_attrs [.from | nodeTypes] | select(.)) as $edge_attrs_from + | $edge_attrs_from[.to | nodeTypes] | select(.) + ] | first // "" # TODO don't repeat $edge_attrs[""][""] + ) ]; " ) diff --git a/compiler/compile-config/compile-config-0.00-init_objects b/compiler/compile-config/compile-config-0.00-init_objects index 0fe694072..1941dc331 100755 --- a/compiler/compile-config/compile-config-0.00-init_objects +++ b/compiler/compile-config/compile-config-0.00-init_objects @@ -13,9 +13,12 @@ | .deepdive_.extraction.extractors |= . + {} | .deepdive_.inference |= . + {} | .deepdive_.inference.factors |= . + {} -| .deepdive_.pipeline |= . + {} -| .deepdive_.pipeline.pipelines |= . + {} # make sure our intermediate representation for execution plan set up | .deepdive_.execution |= . + {} | .deepdive_.execution.processes |= . + {} + +# some default values +# allowing the initial process to be named something else +| .deepdive_.execution.process_init_app |= (. // "process/init/app") +| .deepdive_.execution.process_init_relation_prefix |= (. // "process/init/relation/") diff --git a/compiler/compile-config/compile-config-0.51-add_init_app b/compiler/compile-config/compile-config-0.51-add_init_app index e6577171a..9ebd4971d 100755 --- a/compiler/compile-config/compile-config-0.51-add_init_app +++ b/compiler/compile-config/compile-config-0.51-add_init_app @@ -5,7 +5,9 @@ # add a database initialization process | .deepdive_.execution.processes += - { "process/init/app": + # NOTE when process_init_app is overridden, compiler does not define the process and assume it points to an already defined process + if $deepdive.execution.process_init_app == "process/init/app" then + ([{ key: $deepdive.execution.process_init_app, value: { style: "cmd_extractor" , cmd: ( # allow overriding app initialization steps @@ -26,4 +28,5 @@ input/init.sh fi ") } - } + }] | from_entries) + else {} end diff --git a/compiler/compile-config/compile-config-0.52-input_loader b/compiler/compile-config/compile-config-0.52-input_loader index a8f66f59b..f117401e6 100755 --- a/compiler/compile-config/compile-config-0.52-input_loader +++ b/compiler/compile-config/compile-config-0.52-input_loader @@ -13,10 +13,10 @@ ) as $process_defining_the_relation | [ $deepdive.schema.relations // {} | to_entries[] | select($process_defining_the_relation[.key] | not) - | { key: "process/init/relation/\(.key)" + | { key: "\($deepdive.execution.process_init_relation_prefix)\(.key)" , value: { style: "cmd_extractor" , cmd: "deepdive create table \(.key | @sh) && deepdive load \(.key | @sh)" - , dependencies_: [ "process/init/app" ] + , dependencies_: [ $deepdive.execution.process_init_app ] , output_relation: .key , output_: ["data/\(.key)"] } } diff --git a/compiler/compile-config/compile-config-1.00-qualified_names b/compiler/compile-config/compile-config-1.00-qualified_names index ce78ad6d9..144bcd47f 100755 --- a/compiler/compile-config/compile-config-1.00-qualified_names +++ b/compiler/compile-config/compile-config-1.00-qualified_names @@ -3,7 +3,6 @@ # # - extractor names are prefixed with process/* # - factor names are prefixed with factor/* -# - pipeline names are prefixed with pipeline/* # - output_relation names are prefixed with data/* and kept under a new key output_ # - dependencies are rewritten with qualified names under a new key dependencies_ ## @@ -31,16 +30,5 @@ ) ) -# qualify names in pipelines -| .deepdive_.pipeline.pipelines |= with_entries - ( .key as $p - | .key |= "pipeline/\(.)" - | .value |= map( if $deepdive.extraction.extractors[.] then "process/\(.)" - elif $deepdive.inference.factors[.] then "factor/\(.)" - else error("\(.): Neither an extractor or inference rule in pipeline \($p)") - end - ) - ) - # turn all extractors into processes in the execution plan under compilation | .deepdive_.execution.processes += .deepdive_.extraction.extractors diff --git a/compiler/compile-config/compile-config-2.01-grounding b/compiler/compile-config/compile-config-2.01-grounding index 6cf62835e..75452a496 100755 --- a/compiler/compile-config/compile-config-2.01-grounding +++ b/compiler/compile-config/compile-config-2.01-grounding @@ -702,7 +702,7 @@ def factorWeightDescriptionSqlExpr: ## from_grounding # A nominal process to make it easy to redo the grounding -# TODO remove this once deepdive-do supports process groups or pipelines +# TODO remove this once deepdive-do supports process groups | .deepdive_.execution.processes += { "process/grounding/from_grounding": { style: "cmd_extractor", cmd: ": no-op" diff --git a/compiler/compile-config/compile-config-9.98-ensure_init_app b/compiler/compile-config/compile-config-9.98-ensure_init_app index cac0525fb..ab452cf37 100755 --- a/compiler/compile-config/compile-config-9.98-ensure_init_app +++ b/compiler/compile-config/compile-config-9.98-ensure_init_app @@ -4,7 +4,7 @@ .deepdive_ as $deepdive | .deepdive_.execution.processes |= with_entries( - if .key == "process/init/app" then . else + if .key == $deepdive.execution.process_init_app then . else .value |= # simply add init/app to all processes whose dependencies are empty # (This is a good approximation to making all processes depend on @@ -13,7 +13,7 @@ # there's a cycle in the dependency.) ( if (.dependencies_ | length) > 0 or (.input_ | length) > 0 then . - else .dependencies_ += ["process/init/app"] + else .dependencies_ += [$deepdive.execution.process_init_app] end) end ) diff --git a/compiler/deepdive-compile b/compiler/deepdive-compile index aa14868db..2f306dfb6 100755 --- a/compiler/deepdive-compile +++ b/compiler/deepdive-compile @@ -148,7 +148,7 @@ deepdive-check -a -c "$PWD"/config.json 'compiled_*' 2>&1 | sed 's/^/ /' >&2 ############################################################################### STEP "Compiling executable code into:" -# compile extractors and factors under process/ and factor/ +# compile extractors under process/ pids=(--) for cc in "$DEEPDIVE_HOME"/util/compile-code/compile-code-*; do [[ -x "$cc" ]] || continue diff --git a/database/deepdive-load b/database/deepdive-load index 46fd4ca61..f2f573faf 100755 --- a/database/deepdive-load +++ b/database/deepdive-load @@ -87,6 +87,7 @@ relationIsntInAppSchema=true # internal dd_label column but nothing else) if [[ -z "$Columns" && -e "$DEEPDIVE_APP" ]] && app-has-been-compiled; then # make sure app has been initialized + # TODO process_init_app from run/compiled/config.json deepdive-done process/init/app || DEEPDIVE_PLAN_EDIT=false deepdive-do process/init/app # find the list of columns diff --git a/doc/configuration.md b/doc/configuration.md index 810a9544a..4adf65bb0 100644 --- a/doc/configuration.md +++ b/doc/configuration.md @@ -45,7 +45,7 @@ deepdive { } ``` -In this template, the global section `deepdive` contains following major sections: `db`, `schema`, `extraction`, `inference`, `calibration`. Other optional sections are `sampler` and `pipeline`. +In this template, the global section `deepdive` contains following major sections: `db`, `schema`, `extraction`, `inference`, `calibration`. Other optional sections are `sampler` and `execution`. Links to these sections: diff --git a/runner/deepdive-run b/runner/deepdive-run index 81bfea584..570be8d3e 100755 --- a/runner/deepdive-run +++ b/runner/deepdive-run @@ -16,4 +16,5 @@ set -euo pipefail # default target [[ $# -gt 0 ]] || set -- model/calibration-plots +# TODO process_init_app from run/compiled/config.json exec deepdive-redo process/init/app "$@" diff --git a/runner/format_timestamp b/runner/format_timestamp index 72b2cd333..78c978f2a 100755 --- a/runner/format_timestamp +++ b/runner/format_timestamp @@ -13,7 +13,7 @@ set -eu format_duration() { local T=$1 if [[ $T -eq 0 ]]; then - echo "just now" + echo "0s" else [[ $T -gt 0 ]] || let T=$((0 - T)) local D=$((T/60/60/24)) @@ -34,7 +34,13 @@ for file; do ts=$(date -r "$file" -Iseconds 2>/dev/null || date -r "$file" +%FT%T%:z) nsecs=$(( $FORMAT_DURATION_SINCE - $(date -r "$file" +%s) )) dur=$(format_duration $nsecs) - echo "$ts (${dur## } ago)" + echo "$ts ($( + case $nsecs in + (0) echo "just now" ;; + (-*) echo "in${dur}" ;; + (*) echo "${dur## } ago" ;; + esac + )" exit done echo N/A diff --git a/runner/resolve-args-to-do.sh b/runner/resolve-args-to-do.sh index 8aa7374e1..64ed85877 100644 --- a/runner/resolve-args-to-do.sh +++ b/runner/resolve-args-to-do.sh @@ -23,6 +23,7 @@ unset MAKEFLAGS MFLAGS MAKEOVERRIDES MAKELEVEL if [[ $# -gt 0 ]]; then makeTargets=() for target; do + # TODO recognize all resolved=false for fmt in %s %s.done {data,process,data/model,model,process/{model,grounding{,/{factor,variable}}}}/%s.done; do makeTarget=$(printf "$fmt" "$target")