From 8450ce7ccf489c771e57c47c63a18caca53a9ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Tr=C3=B3jczak?= Date: Mon, 14 Nov 2022 15:58:56 +0100 Subject: [PATCH] 28 suppressing named individuals (#37) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add documentation update and upgrade the project version Co-authored-by: Rafał Trójczak --- README.md | 1 + pom.xml | 19 +- .../edmcouncil/rdf_toolkit/RdfFormatter.java | 4 +- .../runner/CommandLineArgumentsHandler.java | 131 ++ .../rdf_toolkit/runner/OptionHandler.java | 89 +- .../rdf_toolkit/runner/RdfToolkitOptions.java | 12 + .../rdf_toolkit/runner/RdfToolkitRunner.java | 144 +- .../{ => constant}/CommandLineOption.java | 5 +- .../runner/{ => constant}/RunningMode.java | 2 +- .../RdfToolkitOptionHandlingException.java | 1 + .../rdf_toolkit/util/Constants.java | 1 + .../rdf_toolkit/util/TextUtils.java | 20 +- .../writer/SortedJsonLdWriter.java | 1205 +++++++++-------- .../rdf_toolkit/writer/SortedRdfWriter.java | 1143 ++++++++-------- .../writer/SortedRdfXmlWriter.java | 22 +- .../writer/SortedTurtleWriter.java | 2 +- .../CommandLineArgumentsHandlerTest.java | 108 ++ 17 files changed, 1619 insertions(+), 1290 deletions(-) create mode 100644 src/main/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandler.java rename src/main/java/org/edmcouncil/rdf_toolkit/runner/{ => constant}/CommandLineOption.java (96%) rename src/main/java/org/edmcouncil/rdf_toolkit/runner/{ => constant}/RunningMode.java (96%) create mode 100644 src/test/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandlerTest.java diff --git a/README.md b/README.md index 7977dc0..155de2e 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ java -jar rdf-toolkit.jar --source C:/source_ontologies/example.rdf --target C:/ turtle (Turtle) -sip,--short-iri-priority set what takes priority when shortening IRIs: prefix [default], base-iri + -sni,--suppress-named-individuals suppresses all instances of owl:NamedIndividual -t,--target target (output) RDF file -tc,--trailing-comment sets the text of the trailing comment in the ontology. Can be repeated for a multi-line comment diff --git a/pom.xml b/pom.xml index 181eacd..9d02313 100644 --- a/pom.xml +++ b/pom.xml @@ -6,12 +6,16 @@ org.edmcouncil rdf-toolkit - 1.13.0 + 1.14.0 UTF-8 11 11 + + + 1.4.4 + 5.9.0 @@ -82,19 +86,25 @@ ch.qos.logback logback-classic - 1.2.11 + ${logback.version} org.junit.jupiter junit-jupiter-api - 5.9.0 + ${junit.jupiter.version} test org.junit.jupiter junit-jupiter-engine - 5.9.0 + ${junit.jupiter.version} + test + + + org.junit.jupiter + junit-jupiter-params + ${junit.jupiter.version} test @@ -252,7 +262,6 @@ - \ No newline at end of file diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/RdfFormatter.java b/src/main/java/org/edmcouncil/rdf_toolkit/RdfFormatter.java index 611f083..342646c 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/RdfFormatter.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/RdfFormatter.java @@ -24,11 +24,11 @@ package org.edmcouncil.rdf_toolkit; +import java.io.PrintWriter; +import java.io.StringWriter; import org.edmcouncil.rdf_toolkit.runner.RdfToolkitRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.PrintWriter; -import java.io.StringWriter; /** * RDF formatter that formats in a consistent order, friendly for version control systems. diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandler.java b/src/main/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandler.java new file mode 100644 index 0000000..9ce3046 --- /dev/null +++ b/src/main/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandler.java @@ -0,0 +1,131 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Enterprise Data Management Council + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package org.edmcouncil.rdf_toolkit.runner; + +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.HELP; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.VERSION; +import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.EXIT; +import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.PRINT_AND_EXIT; +import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.PRINT_USAGE_AND_EXIT; +import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.RUN_ON_DIRECTORY; +import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.RUN_ON_FILE; + +import com.jcabi.manifests.Manifests; +import java.io.FileNotFoundException; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.memory.model.MemValueFactory; +import org.edmcouncil.rdf_toolkit.RdfFormatter; +import org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption; +import org.edmcouncil.rdf_toolkit.runner.exception.RdfToolkitOptionHandlingException; + +public class CommandLineArgumentsHandler { + + private final Options options; + private final ValueFactory valueFactory; + + public CommandLineArgumentsHandler() { + this.options = CommandLineOption.prepareOptions(); + this.valueFactory = new MemValueFactory(); + } + + public RdfToolkitOptions handleArguments(String[] args) + throws RdfToolkitOptionHandlingException, FileNotFoundException, ParseException { + var rdfToolkitOptions = new RdfToolkitOptions(args); + + // Parse the command line options. + CommandLineParser parser = new DefaultParser(); + CommandLine line = parser.parse(options, args); + rdfToolkitOptions.setCommandLine(line); + + var optionHandler = new OptionHandler(rdfToolkitOptions); + + // Print out version, if requested. + if (line.hasOption(VERSION.getShortOpt())) { + rdfToolkitOptions.setOutput(getVersion()); + rdfToolkitOptions.setRunningMode(PRINT_AND_EXIT); + return rdfToolkitOptions; + } + + // Print out help, if requested. + if (line.hasOption(HELP.getShortOpt())) { + usage(options); + rdfToolkitOptions.setRunningMode(EXIT); + return rdfToolkitOptions; + } + + optionHandler.handleRunningOnDirectory(line, rdfToolkitOptions); + if (rdfToolkitOptions.getRunningMode() == PRINT_USAGE_AND_EXIT) { + usage(options); + return rdfToolkitOptions; + } + if (rdfToolkitOptions.getRunningMode() == RUN_ON_DIRECTORY) { + return rdfToolkitOptions; + } + + var sourceFile = optionHandler.handleSourceFile(); + optionHandler.handleTargetFile(); + optionHandler.handleBaseIri(valueFactory); + optionHandler.handleIriReplacementOptions(); + optionHandler.handleUseDtdSubset(); + optionHandler.handleInlineBlankNodes(); + optionHandler.handleInferBaseIri(); + optionHandler.handleLeadingComments(); + optionHandler.handleTrailingComments(); + optionHandler.handleStringDataTyping(); + optionHandler.handleOverrideStringLanguage(); + optionHandler.handleIndent(); + optionHandler.handleSourceFormat(sourceFile); + optionHandler.handleTargetFormat(); + optionHandler.handleShortUriPref(); + optionHandler.handleLineEnd(); + optionHandler.handleOmitXmlnsNamespace(); + optionHandler.handleSuppressNamedIndividuals(); + + rdfToolkitOptions.setRunningMode(RUN_ON_FILE); + + return rdfToolkitOptions; + } + + private void usage(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter.setWidth(100); + formatter.printHelp(getVersion(), options); + } + + private String getVersion() { + String implementationTitle = Manifests.read("Implementation-Title"); + String implementationVersion = Manifests.read("Implementation-Version"); + return String.format( + "%s (%s version %s)", + RdfFormatter.class.getSimpleName(), + implementationTitle, + implementationVersion); + } +} diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/runner/OptionHandler.java b/src/main/java/org/edmcouncil/rdf_toolkit/runner/OptionHandler.java index 384b5af..f3053c0 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/runner/OptionHandler.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/runner/OptionHandler.java @@ -21,30 +21,32 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + package org.edmcouncil.rdf_toolkit.runner; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.BASE_IRI; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.INDENT; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.INFER_BASE_IRI; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.INLINE_BLANK_NODES; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.LEADING_COMMENT; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.LINE_END; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.OMIT_XMLNS_NAMESPACE; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.OVERRIDE_STRING_LANGUAGE; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.SHORT_IRI_PRIORITY; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.SOURCE; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.SOURCE_DIRECTORY; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.SOURCE_DIRECTORY_PATTERN; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.SOURCE_FORMAT; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.STRING_DATA_TYPING; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.TARGET; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.TARGET_DIRECTORY; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.TARGET_DIRECTORY_PATTERN; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.TARGET_FORMAT; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.TRAILING_COMMENT; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.USE_DTD_SUBSET; -import static org.edmcouncil.rdf_toolkit.runner.RunningMode.PRINT_USAGE_AND_EXIT; -import static org.edmcouncil.rdf_toolkit.runner.RunningMode.RUN_ON_DIRECTORY; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.BASE_IRI; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.INDENT; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.INFER_BASE_IRI; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.INLINE_BLANK_NODES; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.LEADING_COMMENT; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.LINE_END; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.OMIT_XMLNS_NAMESPACE; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.OVERRIDE_STRING_LANGUAGE; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SHORT_IRI_PRIORITY; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SOURCE; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SOURCE_DIRECTORY; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SOURCE_DIRECTORY_PATTERN; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SOURCE_FORMAT; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.STRING_DATA_TYPING; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SUPPRESS_NAMED_INDIVIDUALS; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET_DIRECTORY; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET_DIRECTORY_PATTERN; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET_FORMAT; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TRAILING_COMMENT; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.USE_DTD_SUBSET; +import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.PRINT_USAGE_AND_EXIT; +import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.RUN_ON_DIRECTORY; import org.apache.commons.cli.CommandLine; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.ValueFactory; @@ -60,9 +62,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.InputStream; -import java.io.OutputStream; public class OptionHandler { @@ -159,29 +159,29 @@ public File handleSourceFile() throws RdfToolkitOptionHandlingException, FileNot public void handleRunningOnDirectory(CommandLine commandLine, RdfToolkitOptions rdfToolkitOptions) { // Check if the command-line options suggest that a directory of files is to be formatted - if (commandLine.hasOption(SOURCE_DIRECTORY.getShortOpt()) || - commandLine.hasOption(SOURCE_DIRECTORY_PATTERN.getShortOpt()) || - commandLine.hasOption(TARGET_DIRECTORY.getShortOpt()) || - commandLine.hasOption(TARGET_DIRECTORY_PATTERN.getShortOpt())) { + if (commandLine.hasOption(SOURCE_DIRECTORY.getShortOpt()) + || commandLine.hasOption(SOURCE_DIRECTORY_PATTERN.getShortOpt()) + || commandLine.hasOption(TARGET_DIRECTORY.getShortOpt()) + || commandLine.hasOption(TARGET_DIRECTORY_PATTERN.getShortOpt())) { // Assume user wants to process a directory of files. - if (!commandLine.hasOption(SOURCE_DIRECTORY.getShortOpt()) || - !commandLine.hasOption(SOURCE_DIRECTORY_PATTERN.getShortOpt()) || - !commandLine.hasOption(TARGET_DIRECTORY.getShortOpt()) || - !commandLine.hasOption(TARGET_DIRECTORY_PATTERN.getShortOpt())) { - LOGGER.error("Directory processing options must all be used together: -sd (--source-directory), " + - "-sdp (--source-directory-pattern), -td (--target-directory), -tdp (--target-directory-pattern)"); + if (!commandLine.hasOption(SOURCE_DIRECTORY.getShortOpt()) + || !commandLine.hasOption(SOURCE_DIRECTORY_PATTERN.getShortOpt()) + || !commandLine.hasOption(TARGET_DIRECTORY.getShortOpt()) + || !commandLine.hasOption(TARGET_DIRECTORY_PATTERN.getShortOpt())) { + LOGGER.error("Directory processing options must all be used together: -sd (--source-directory), " + + "-sdp (--source-directory-pattern), -td (--target-directory), -tdp (--target-directory-pattern)"); rdfToolkitOptions.setRunningMode(PRINT_USAGE_AND_EXIT); } - if (commandLine.hasOption(SOURCE.getShortOpt()) || - commandLine.hasOption(TARGET.getShortOpt())) { - LOGGER.error("Source (-s or --source) and target (-t or --target) options cannot be used together with " + - "directory processing options."); + if (commandLine.hasOption(SOURCE.getShortOpt()) + || commandLine.hasOption(TARGET.getShortOpt())) { + LOGGER.error("Source (-s or --source) and target (-t or --target) options cannot be used together with " + + "directory processing options."); rdfToolkitOptions.setRunningMode(PRINT_USAGE_AND_EXIT); } - if (!commandLine.hasOption(SOURCE_FORMAT.getShortOpt()) || - !commandLine.hasOption(TARGET_FORMAT.getShortOpt())) { - LOGGER.error("Source format (-sfmt or --source-format) and target format (-tfmt or --target-format) options " + - "must be provided when using directory processing options."); + if (!commandLine.hasOption(SOURCE_FORMAT.getShortOpt()) + || !commandLine.hasOption(TARGET_FORMAT.getShortOpt())) { + LOGGER.error("Source format (-sfmt or --source-format) and target format (-tfmt or --target-format) options " + + "must be provided when using directory processing options."); rdfToolkitOptions.setRunningMode(PRINT_USAGE_AND_EXIT); } @@ -386,4 +386,9 @@ public void handleOmitXmlnsNamespace() { boolean omitXmlnsNamespace = commandLine.hasOption(OMIT_XMLNS_NAMESPACE.getShortOpt()); rdfToolkitOptions.setOmitXmlnsNamespace(omitXmlnsNamespace); } + + public void handleSuppressNamedIndividuals() { + boolean suppressNamedIndividuals = commandLine.hasOption(SUPPRESS_NAMED_INDIVIDUALS.getShortOpt()); + rdfToolkitOptions.setSuppressNamedIndividuals(suppressNamedIndividuals); + } } diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitOptions.java b/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitOptions.java index 11e1ee8..45040d7 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitOptions.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitOptions.java @@ -33,6 +33,7 @@ import static org.edmcouncil.rdf_toolkit.util.Constants.OVERRIDE_STRING_LANGUAGE; import static org.edmcouncil.rdf_toolkit.util.Constants.SHORT_URI_PREF; import static org.edmcouncil.rdf_toolkit.util.Constants.STRING_DATA_TYPE_OPTION; +import static org.edmcouncil.rdf_toolkit.util.Constants.SUPPRESS_NAMED_INDIVIDUALS; import static org.edmcouncil.rdf_toolkit.util.Constants.TRAILING_COMMENTS; import static org.edmcouncil.rdf_toolkit.util.Constants.USE_DTD_SUBSET; @@ -44,6 +45,7 @@ import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.rio.RDFFormat; import org.edmcouncil.rdf_toolkit.io.format.TargetFormats; +import org.edmcouncil.rdf_toolkit.runner.constant.RunningMode; import org.edmcouncil.rdf_toolkit.util.ShortIriPreferences; import org.edmcouncil.rdf_toolkit.util.StringDataTypeOptions; @@ -74,6 +76,7 @@ public class RdfToolkitOptions { private String overrideStringLanguage; private String lineEnd; private boolean omitXmlnsNamespace; + private boolean suppressNamedIndividuals; public RdfToolkitOptions(String[] args) { this.args = args; @@ -104,6 +107,7 @@ public Map getOptions() { options.put(OVERRIDE_STRING_LANGUAGE, getOverrideStringLanguage()); options.put(LINE_END, getLineEnd()); options.put(OMIT_XMLNS_NAMESPACE, getOmitXmlnsNamespace()); + options.put(SUPPRESS_NAMED_INDIVIDUALS, getSuppressNamedIndividuals()); return options; } @@ -290,4 +294,12 @@ public boolean getOmitXmlnsNamespace() { public void setOmitXmlnsNamespace(boolean omitXmlnsNamespace) { this.omitXmlnsNamespace = omitXmlnsNamespace; } + + public boolean getSuppressNamedIndividuals() { + return suppressNamedIndividuals; + } + + public void setSuppressNamedIndividuals(boolean suppressNamedIndividuals) { + this.suppressNamedIndividuals = suppressNamedIndividuals; + } } \ No newline at end of file diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitRunner.java b/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitRunner.java index 1017cab..4463d10 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitRunner.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/runner/RdfToolkitRunner.java @@ -24,27 +24,33 @@ package org.edmcouncil.rdf_toolkit.runner; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.HELP; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.SOURCE_DIRECTORY; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.SOURCE_DIRECTORY_PATTERN; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.TARGET_DIRECTORY; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.TARGET_DIRECTORY_PATTERN; -import static org.edmcouncil.rdf_toolkit.runner.CommandLineOption.VERSION; -import static org.edmcouncil.rdf_toolkit.runner.RunningMode.EXIT; -import static org.edmcouncil.rdf_toolkit.runner.RunningMode.PRINT_AND_EXIT; -import static org.edmcouncil.rdf_toolkit.runner.RunningMode.PRINT_USAGE_AND_EXIT; -import static org.edmcouncil.rdf_toolkit.runner.RunningMode.RUN_ON_DIRECTORY; -import static org.edmcouncil.rdf_toolkit.runner.RunningMode.RUN_ON_FILE; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SOURCE_DIRECTORY; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.SOURCE_DIRECTORY_PATTERN; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET_DIRECTORY; +import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET_DIRECTORY_PATTERN; import com.jcabi.manifests.Manifests; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; import java.util.Optional; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.Model; @@ -59,28 +65,12 @@ import org.eclipse.rdf4j.sail.memory.model.MemValueFactory; import org.edmcouncil.rdf_toolkit.RdfFormatter; import org.edmcouncil.rdf_toolkit.io.DirectoryWalker; +import org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption; import org.edmcouncil.rdf_toolkit.runner.exception.RdfToolkitOptionHandlingException; import org.edmcouncil.rdf_toolkit.util.Constants; import org.edmcouncil.rdf_toolkit.writer.SortedRdfWriterFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.io.Writer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; public class RdfToolkitRunner { @@ -95,7 +85,8 @@ public RdfToolkitRunner() { } public void run(String[] args) throws Exception { - var rdfToolkitOptions = handleArguments(args); + var commandLineArgumentsHandler = new CommandLineArgumentsHandler(); + var rdfToolkitOptions = commandLineArgumentsHandler.handleArguments(args); switch (rdfToolkitOptions.getRunningMode()) { case PRINT_USAGE_AND_EXIT: @@ -118,77 +109,10 @@ public void run(String[] args) throws Exception { } } - private RdfToolkitOptions handleArguments(String[] args) - throws ParseException, FileNotFoundException, RdfToolkitOptionHandlingException { - var rdfToolkitOptions = new RdfToolkitOptions(args); - - // Parse the command line options. - CommandLineParser parser = new DefaultParser(); - CommandLine line = parser.parse(options, args); - rdfToolkitOptions.setCommandLine(line); - - var optionHandler = new OptionHandler(rdfToolkitOptions); - - // Print out version, if requested. - if (line.hasOption(VERSION.getShortOpt())) { - rdfToolkitOptions.setOutput(getVersion()); - rdfToolkitOptions.setRunningMode(PRINT_AND_EXIT); - return rdfToolkitOptions; - } - - // Print out help, if requested. - if (line.hasOption(HELP.getShortOpt())) { - usage(options); - rdfToolkitOptions.setRunningMode(EXIT); - return rdfToolkitOptions; - } - - optionHandler.handleRunningOnDirectory(line, rdfToolkitOptions); - if (rdfToolkitOptions.getRunningMode() == PRINT_USAGE_AND_EXIT) { - usage(options); - return rdfToolkitOptions; - } - if (rdfToolkitOptions.getRunningMode() == RUN_ON_DIRECTORY) { - return rdfToolkitOptions; - } - - var sourceFile = optionHandler.handleSourceFile(); - optionHandler.handleTargetFile(); - optionHandler.handleBaseIri(valueFactory); - optionHandler.handleIriReplacementOptions(); - optionHandler.handleUseDtdSubset(); - optionHandler.handleInlineBlankNodes(); - optionHandler.handleInferBaseIri(); - optionHandler.handleLeadingComments(); - optionHandler.handleTrailingComments(); - optionHandler.handleStringDataTyping(); - optionHandler.handleOverrideStringLanguage(); - optionHandler.handleIndent(); - optionHandler.handleSourceFormat(sourceFile); - optionHandler.handleTargetFormat(); - optionHandler.handleShortUriPref(); - optionHandler.handleLineEnd(); - optionHandler.handleOmitXmlnsNamespace(); - - rdfToolkitOptions.setRunningMode(RUN_ON_FILE); - - return rdfToolkitOptions; - } - - private String getVersion() { - String implementationTitle = Manifests.read("Implementation-Title"); - String implementationVersion = Manifests.read("Implementation-Version"); - return String.format( - "%s (%s version %s)", - RdfFormatter.class.getSimpleName(), - implementationTitle, - implementationVersion); - } - private void runOnFile(RdfToolkitOptions rdfToolkitOptions) throws Exception { var sourceModel = readModel(rdfToolkitOptions); - boolean isIriPatternAndIriReplacementNotNull = (rdfToolkitOptions.getIriPattern() != null) && - (rdfToolkitOptions.getIriReplacement() != null); + boolean isIriPatternAndIriReplacementNotNull = (rdfToolkitOptions.getIriPattern() != null) + && (rdfToolkitOptions.getIriReplacement() != null); Model replaceModel = new TreeModel(); Set sourceNamespaces = sourceModel.getNamespaces(); @@ -264,9 +188,9 @@ private void runOnFile(RdfToolkitOptions rdfToolkitOptions) throws Exception { if (rdfToolkitOptions.getInferBaseIri()) { LinkedList owlOntologyIris = new LinkedList<>(); for (Statement st : sourceModel) { - if ((Constants.RDF_TYPE.equals(st.getPredicate())) && - (Constants.owlOntology.equals(st.getObject())) && - (st.getSubject() instanceof IRI)) { + if ((Constants.RDF_TYPE.equals(st.getPredicate())) + && (Constants.owlOntology.equals(st.getObject())) + && (st.getSubject() instanceof IRI)) { owlOntologyIris.add((IRI) st.getSubject()); } } @@ -287,7 +211,7 @@ private void runOnFile(RdfToolkitOptions rdfToolkitOptions) throws Exception { Writer targetWriter = new OutputStreamWriter( outputStream, - StandardCharsets.UTF_8.name()); + StandardCharsets.UTF_8); SortedRdfWriterFactory factory = new SortedRdfWriterFactory( rdfToolkitOptions.getTargetFormat()); RDFWriter rdfWriter = factory.getWriter(targetWriter, rdfToolkitOptions.getOptions()); @@ -391,4 +315,14 @@ private void usage(Options options) { formatter.setWidth(100); formatter.printHelp(getVersion(), options); } + + private String getVersion() { + String implementationTitle = Manifests.read("Implementation-Title"); + String implementationVersion = Manifests.read("Implementation-Version"); + return String.format( + "%s (%s version %s)", + RdfFormatter.class.getSimpleName(), + implementationTitle, + implementationVersion); + } } \ No newline at end of file diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/runner/CommandLineOption.java b/src/main/java/org/edmcouncil/rdf_toolkit/runner/constant/CommandLineOption.java similarity index 96% rename from src/main/java/org/edmcouncil/rdf_toolkit/runner/CommandLineOption.java rename to src/main/java/org/edmcouncil/rdf_toolkit/runner/constant/CommandLineOption.java index 31def68..9718930 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/runner/CommandLineOption.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/runner/constant/CommandLineOption.java @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package org.edmcouncil.rdf_toolkit.runner; +package org.edmcouncil.rdf_toolkit.runner.constant; import org.apache.commons.cli.Options; import org.edmcouncil.rdf_toolkit.io.format.SourceFormats; @@ -54,7 +54,8 @@ public enum CommandLineOption { OVERRIDE_STRING_LANGUAGE("osl", "override-string-language", true, "sets an override language that is applied to all strings"), INDENT("i", "indent", true, "sets the indent string. Default is a single tab character"), LINE_END("ln", "line-end", true, "sets the end-line character(s); supported characters: \\n (LF), \\r (CR). Default is the LF character"), - OMIT_XMLNS_NAMESPACE("oxn", "omit-xmlns-namespace", false, "omits xmlns namespace"); + OMIT_XMLNS_NAMESPACE("oxn", "omit-xmlns-namespace", false, "omits xmlns namespace"), + SUPPRESS_NAMED_INDIVIDUALS("sni", "suppress-named-individuals", false, "suppresses all instances of owl:NamedIndividual"); private final String shortOpt; private final String longOpt; diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/runner/RunningMode.java b/src/main/java/org/edmcouncil/rdf_toolkit/runner/constant/RunningMode.java similarity index 96% rename from src/main/java/org/edmcouncil/rdf_toolkit/runner/RunningMode.java rename to src/main/java/org/edmcouncil/rdf_toolkit/runner/constant/RunningMode.java index a09d388..46679e0 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/runner/RunningMode.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/runner/constant/RunningMode.java @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package org.edmcouncil.rdf_toolkit.runner; +package org.edmcouncil.rdf_toolkit.runner.constant; public enum RunningMode { EXIT, diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/runner/exception/RdfToolkitOptionHandlingException.java b/src/main/java/org/edmcouncil/rdf_toolkit/runner/exception/RdfToolkitOptionHandlingException.java index 0f7f94a..5fafece 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/runner/exception/RdfToolkitOptionHandlingException.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/runner/exception/RdfToolkitOptionHandlingException.java @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + package org.edmcouncil.rdf_toolkit.runner.exception; public class RdfToolkitOptionHandlingException extends Exception { diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/util/Constants.java b/src/main/java/org/edmcouncil/rdf_toolkit/util/Constants.java index 17de1f6..c1e6d66 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/util/Constants.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/util/Constants.java @@ -42,6 +42,7 @@ public class Constants { public static final String OVERRIDE_STRING_LANGUAGE = "overrideStringLanguage"; public static final String LINE_END = "lineEnd"; public static final String OMIT_XMLNS_NAMESPACE = "omitXmlnsNamespace"; + public static final String SUPPRESS_NAMED_INDIVIDUALS = "suppressNamedIndividuals"; /** * Factory for generating literal values. diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/util/TextUtils.java b/src/main/java/org/edmcouncil/rdf_toolkit/util/TextUtils.java index e447022..1fbc2de 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/util/TextUtils.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/util/TextUtils.java @@ -47,13 +47,13 @@ public static boolean isNameChar(char ch) { if (':' == ch) { return true; } - if (('0' <= ch) && (ch <= '9')) { + if (isDigit(ch)) { return true; } - if (('A' <= ch) && (ch <= 'Z')) { + if (isUpperCaseLetter(ch)) { return true; } - if (('a' <= ch) && (ch <= 'z')) { + if (isLowerCaseLetter(ch)) { return true; } if (('\u00C0' <= ch) && (ch <= '\u00D6')) { @@ -113,6 +113,8 @@ public static boolean isMultilineString(String str) { case 0xC: case 0xD: return true; + default: + // Do nothing } } return false; @@ -139,4 +141,16 @@ public static boolean isPrefixedNameLocalPart(String str) { } return true; } + + private static boolean isDigit(char ch) { + return '0' <= ch && ch <= '9'; + } + + private static boolean isUpperCaseLetter(char ch) { + return 'A' <= ch && ch <= 'Z'; + } + + private static boolean isLowerCaseLetter(char ch) { + return 'a' <= ch && ch <= 'z'; + } } diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedJsonLdWriter.java b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedJsonLdWriter.java index f055b65..163550d 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedJsonLdWriter.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedJsonLdWriter.java @@ -28,6 +28,7 @@ import static org.edmcouncil.rdf_toolkit.comparator.ComparisonUtils.isCollection; import static org.edmcouncil.rdf_toolkit.util.Constants.INDENT; import static org.edmcouncil.rdf_toolkit.util.Constants.LINE_END; + import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; @@ -52,677 +53,705 @@ import javax.xml.namespace.QName; /** - * Equivalent to Sesame's built-in JSON-LD writer, but the triples are sorted into a consistent order. - * In order to do the sorting, it must be possible to load all of the RDF statements into memory. - * NOTE: comments are suppressed, as there isn't a clear way to sort them along with triples. + * Equivalent to Sesame's built-in JSON-LD writer, but the triples are sorted into a consistent order. In order to do + * the sorting, it must be possible to load all of the RDF statements into memory. NOTE: comments are suppressed, as + * there isn't a clear way to sort them along with triples. */ public class SortedJsonLdWriter extends SortedRdfWriter { - // TODO: check generated files for unnecessary blank lines, and find ways to remove them - - // no need to use namespace prefixes generated by the serializer for JSON-LD. - private static final boolean USE_GENERATED_PREFIXES = false; - - // Turtle allows "values" in RDF collections - private static final Class collectionClass = Value.class; - - /** Output stream for this JSON-LD writer. */ - private final IndentingWriter output; - - /** - * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied output stream. - * - * @param out The OutputStream to write the JSON-LD to. - */ - public SortedJsonLdWriter(OutputStream out) { - super(out); - this.output = new IndentingWriter(new OutputStreamWriter(out)); - this.output.setLineEnd(DEFAULT_LINE_END); - this.out = this.output; + // TODO: check generated files for unnecessary blank lines, and find ways to remove them + + // no need to use namespace prefixes generated by the serializer for JSON-LD. + private static final boolean USE_GENERATED_PREFIXES = false; + + // Turtle allows "values" in RDF collections + private static final Class collectionClass = Value.class; + + /** + * Output stream for this JSON-LD writer. + */ + private final IndentingWriter output; + + /** + * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied output stream. + * + * @param out The OutputStream to write the JSON-LD to. + */ + public SortedJsonLdWriter(OutputStream out) { + super(out); + this.output = new IndentingWriter(new OutputStreamWriter(out)); + this.output.setLineEnd(DEFAULT_LINE_END); + this.out = this.output; + } + + /** + * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied writer. + * + * @param writer The Writer to write the JSON-LD to. + */ + public SortedJsonLdWriter(Writer writer) { + super(writer); + this.output = new IndentingWriter(writer); + this.output.setLineEnd(DEFAULT_LINE_END); + this.out = this.output; + } + + /** + * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied output stream. + * + * @param out The OutputStream to write the JSON-LD to. + * @param options options for the JSON-LD writer. + */ + public SortedJsonLdWriter(OutputStream out, Map options) { + super(out, options); + this.output = new IndentingWriter(new OutputStreamWriter(out)); + this.out = this.output; + if (options.containsKey(INDENT)) { + this.output.setIndentationString((String) options.get(INDENT)); } - - /** - * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied writer. - * - * @param writer The Writer to write the JSON-LD to. - */ - public SortedJsonLdWriter(Writer writer) { - super(writer); - this.output = new IndentingWriter(writer); - this.output.setLineEnd(DEFAULT_LINE_END); - this.out = this.output; + String lineEnd = options.containsKey(LINE_END) ? options.get(LINE_END).toString() : DEFAULT_LINE_END; + this.output.setLineEnd(lineEnd); + } + + /** + * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied writer. + * + * @param writer The Writer to write the JSON-LD to. + * @param options options for the JSON-LD writer. + */ + public SortedJsonLdWriter(Writer writer, Map options) { + super(writer, options); + this.output = new IndentingWriter(writer); + this.out = this.output; + if (options.containsKey(INDENT)) { + this.output.setIndentationString((String) options.get(INDENT)); } - - /** - * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied output stream. - * - * @param out The OutputStream to write the JSON-LD to. - * @param options options for the JSON-LD writer. - */ - public SortedJsonLdWriter(OutputStream out, Map options) { - super(out, options); - this.output = new IndentingWriter(new OutputStreamWriter(out)); - this.out = this.output; - if (options.containsKey(INDENT)) { - this.output.setIndentationString((String) options.get(INDENT)); - } - String lineEnd = options.containsKey(LINE_END) ? options.get(LINE_END).toString() : DEFAULT_LINE_END; - this.output.setLineEnd(lineEnd); + String lineEnd = options.containsKey(LINE_END) ? options.get(LINE_END).toString() : DEFAULT_LINE_END; + this.output.setLineEnd(lineEnd); + } + + /** + * Signals the start of the RDF data. This method is called before any data is reported. + * + * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. + */ + @Override + public void startRDF() throws RDFHandlerException { + super.startRDF(); + output.setIndentationLevel(0); + } + + /** + * Signals the end of the RDF data. This method is called when all data has been reported. + * + * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. + */ + @Override + public void endRDF() throws RDFHandlerException { + try { + // Sort triples, etc. + sortedOntologies = unsortedOntologies.toSorted(collectionClass, comparisonContext); + if (sortedOntologies.size() != unsortedOntologies.size()) { + System.err.printf("**** ontologies unexpectedly lost or gained during sorting: %d != %d%n", + sortedOntologies.size(), + unsortedOntologies.size()); + System.err.flush(); + } + + sortedTripleMap = unsortedTripleMap.toSorted(collectionClass, comparisonContext); + compareSortedToUnsortedTripleMap(sortedTripleMap, unsortedTripleMap, "JSON-LD"); // TODO + + sortedBlankNodes = unsortedBlankNodes.toSorted(collectionClass, comparisonContext); + if (sortedBlankNodes.size() != unsortedBlankNodes.size()) { + System.err.printf("**** blank nodes unexpectedly lost or gained during sorting: %d != %d%n", + sortedBlankNodes.size(), + unsortedBlankNodes.size()); + System.err.flush(); + } + + super.endRDF(); + } catch (Exception ex) { + throw new RDFHandlerException("unable to generate/write RDF output", ex); + } + } + + protected void writeHeader(Writer out, SortedTurtleObjectList importList, String[] leadingComments) + throws Exception { + // Process leading comments, if any. + if ((leadingComments != null) && (leadingComments.length >= 1)) { + System.err.println("#### leading comments ignored - JSON-LD does not support comments"); + System.err.flush(); } - /** - * Creates an RDFWriter instance that will write sorted JSON-LD to the supplied writer. - * - * @param writer The Writer to write the JSON-LD to. - * @param options options for the JSON-LD writer. - */ - public SortedJsonLdWriter(Writer writer, Map options) { - super(writer, options); - this.output = new IndentingWriter(writer); - this.out = this.output; - if (options.containsKey(INDENT)) { - this.output.setIndentationString((String) options.get(INDENT)); - } - String lineEnd = options.containsKey(LINE_END) ? options.get(LINE_END).toString() : DEFAULT_LINE_END; - this.output.setLineEnd(lineEnd); + // Open list of subject triples + output.write("["); + output.writeEOL(); + output.increaseIndentation(); + } + + protected void writeSubjectSeparator(Writer out) throws Exception { + out.write(","); + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + } else { + out.write("\n"); } + } - /** - * Signals the start of the RDF data. This method is called before any data - * is reported. - * - * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. - */ - @Override - public void startRDF() throws RDFHandlerException { - super.startRDF(); - output.setIndentationLevel(0); + private boolean isOntology(Resource subject) { + UnsortedTurtlePredicateObjectMap poMap = unsortedTripleMap.get(subject); + if (poMap == null) { + return false; + } + UnsortedTurtleObjectList types = poMap.get(Constants.RDF_TYPE); + if (types == null) { + return false; + } + for (Value type : types) { + if (Constants.owlOntology.equals(type)) { + return true; + } + } + return false; + } + + private Set getSubjectPrefixes(Resource subject) { + final Set prefixes = new HashSet<>(); + + // Get subject prefix + if (subject instanceof IRI) { + QName subjectQName = convertIriToQName((IRI) subject, USE_GENERATED_PREFIXES); + if (subjectQName != null) { + prefixes.add(subjectQName.getPrefix()); + } } - /** - * Signals the end of the RDF data. This method is called when all data has - * been reported. - * - * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. - */ - @Override - public void endRDF() throws RDFHandlerException { - try { - // Sort triples, etc. - sortedOntologies = unsortedOntologies.toSorted(collectionClass, comparisonContext); - if (sortedOntologies.size() != unsortedOntologies.size()) { - System.err.printf("**** ontologies unexpectedly lost or gained during sorting: %d != %d%n", - sortedOntologies.size(), - unsortedOntologies.size()); - System.err.flush(); + // Get predicate & value prefixes + SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(subject); + if (poMap != null) { + for (IRI predicate : poMap.sortedKeys()) { + QName predicateQName = convertIriToQName(predicate, USE_GENERATED_PREFIXES); + if (predicateQName != null) { + prefixes.add(predicateQName.getPrefix()); + } + + SortedTurtleObjectList values = poMap.get(predicate); + if (values != null) { + for (Value value : values) { + if (value instanceof IRI) { + QName valueQName = convertIriToQName((IRI) value, USE_GENERATED_PREFIXES); + if (valueQName != null) { + prefixes.add(valueQName.getPrefix()); + } } + if (inlineBlankNodes && (value instanceof BNode)) { + prefixes.addAll(getSubjectPrefixes((BNode) value)); + } + } + } + } + } - sortedTripleMap = unsortedTripleMap.toSorted(collectionClass, comparisonContext); - compareSortedToUnsortedTripleMap(sortedTripleMap, unsortedTripleMap, "JSON-LD"); // TODO + return prefixes; + } - sortedBlankNodes = unsortedBlankNodes.toSorted(collectionClass, comparisonContext); - if (sortedBlankNodes.size() != unsortedBlankNodes.size()) { - System.err.printf("**** blank nodes unexpectedly lost or gained during sorting: %d != %d%n", - sortedBlankNodes.size(), - unsortedBlankNodes.size()); - System.err.flush(); - } + protected void writeSubjectTriples(Writer out, Resource subject) throws Exception { + SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(subject); + if (poMap == null) { + poMap = new SortedTurtlePredicateObjectMap(); + } - super.endRDF(); - } catch (Exception ex) { - throw new RDFHandlerException("unable to generate/write RDF output", ex); - } + out.write("{"); + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + intendedOutput.increaseIndentation(); + } else { + out.write("\n"); + } + out.write("\"@id\" : \""); + if (subject instanceof BNode) { + out.write("_:" + blankNodeNameMap.get(subject)); + } else { + writeIri(out, (IRI) subject); + } + out.write("\""); + if (poMap.size() > 0) { + out.write(","); + } + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + } else { + out.write("\n"); } - protected void writeHeader(Writer out, SortedTurtleObjectList importList, String[] leadingComments) - throws Exception { - // Process leading comments, if any. - if ((leadingComments != null) && (leadingComments.length >= 1)) { - System.err.println("#### leading comments ignored - JSON-LD does not support comments"); - System.err.flush(); + // Write predicate/object pairs rendered first. + for (IRI predicate : firstPredicates) { + if (poMap.containsKey(predicate)) { + SortedTurtleObjectList values = poMap.get(predicate); + // make a copy so we don't delete anything from the original + if (values != null) { + values = (SortedTurtleObjectList) values.clone(); } + List valuesList = new ArrayList<>(); + if (!values.isEmpty()) { + if (predicate == Constants.RDF_TYPE) { + for (IRI preferredType : PREFERRED_RDF_TYPES) { + if (values.contains(preferredType)) { + valuesList.add(preferredType); + values.remove(preferredType); + } + } + } - // Open list of subject triples - output.write("["); - output.writeEOL(); - output.increaseIndentation(); + valuesList.addAll(values); + } + if (!valuesList.isEmpty()) { + writePredicateAndObjectValues(out, predicate, valuesList); + out.write(","); + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + } else { + out.write("\n"); + } + } + } } - protected void writeSubjectSeparator(Writer out) throws Exception { + // Write other predicate/object pairs. + for (IRI predicate : poMap.sortedKeys()) { + if (!firstPredicates.contains(predicate)) { + SortedTurtleObjectList values = poMap.get(predicate); + writePredicateAndObjectValues(out, predicate, values); out.write(","); if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); } else { - out.write("\n"); + out.write("\n"); } + } } - private boolean isOntology(Resource subject) { - UnsortedTurtlePredicateObjectMap poMap = unsortedTripleMap.get(subject); - if (poMap == null) { return false; } - UnsortedTurtleObjectList types = poMap.get(Constants.RDF_TYPE); - if (types == null) { return false; } - for (Value type : types) { - if (Constants.owlOntology.equals(type)) { - return true; - } - } - return false; + // Write context + Set prefixes = getSubjectPrefixes(subject); + out.write("\"@context\" : {"); + if (out instanceof IndentingWriter) { + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); + output.increaseIndentation(); + } else { + out.write("\n"); } - private Set getSubjectPrefixes(Resource subject) { - final Set prefixes = new HashSet<>(); - - // Get subject prefix - if (subject instanceof IRI) { - QName subjectQName = convertIriToQName((IRI)subject, USE_GENERATED_PREFIXES); - if (subjectQName != null) { - prefixes.add(subjectQName.getPrefix()); - } - } - - // Get predicate & value prefixes - SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(subject); - if (poMap != null) { - for (IRI predicate : poMap.sortedKeys()) { - QName predicateQName = convertIriToQName(predicate, USE_GENERATED_PREFIXES); - if (predicateQName != null) { - prefixes.add(predicateQName.getPrefix()); - } - - SortedTurtleObjectList values = poMap.get(predicate); - if (values != null) { - for (Value value : values) { - if (value instanceof IRI) { - QName valueQName = convertIriToQName((IRI)value, USE_GENERATED_PREFIXES); - if (valueQName != null) { - prefixes.add(valueQName.getPrefix()); - } - } - if (inlineBlankNodes && (value instanceof BNode)) { - prefixes.addAll(getSubjectPrefixes((BNode)value)); - } - } - } - } + // For an ontology, add the base URI to the context. + if (isOntology(subject)) { + if (baseIri != null) { + out.write("\"@base\" : \"" + baseIri + "\""); + if (!prefixes.isEmpty()) { + out.write(","); } + } + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + } else { + out.write("\n"); + } + } - return prefixes; + int prefixCount = 0; + for (String prefix : prefixes) { + prefixCount++; + out.write("\"" + prefix + "\" : \"" + namespaceTable.get(prefix) + "\""); + if (prefixCount < prefixes.size()) { + out.write(","); + } + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + } else { + out.write("\n"); + } } - protected void writeSubjectTriples(Writer out, Resource subject) throws Exception { - SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(subject); - if (poMap == null) { - poMap = new SortedTurtlePredicateObjectMap(); - } + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.decreaseIndentation(); + out.write("}"); + intendedOutput.writeEOL(); + } else { + out.write("}\n"); + } - out.write("{"); - if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); - intendedOutput.increaseIndentation(); - } else { - out.write("\n"); - } - out.write("\"@id\" : \""); - if (subject instanceof BNode) { - out.write("_:" + blankNodeNameMap.get(subject)); + // Close statement + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.decreaseIndentation(); + out.write("}"); + } else { + out.write("}"); + } + } + + private String convertIriToString(IRI iri) throws Exception { + return convertIriToString(iri, USE_GENERATED_PREFIXES, false, true); + } + + protected void writePredicateAndObjectValues(Writer out, IRI predicate, Collection values) throws Exception { + final boolean isRdfTypePredicate = Constants.RDF_TYPE.equals(predicate); + out.write("\""); + writePredicate(out, predicate); + out.write("\" : "); + if (values.size() == 1) { + Object value = values.toArray()[0]; + if (isRdfTypePredicate) { + writeObject(out, (IRI) value, true); + } else { + writeObject(out, (Value) value); + } + } else if (values.size() > 1) { + out.write("["); + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + intendedOutput.increaseIndentation(); + } else { + out.write("\n"); + } + int numValues = values.size(); + int valueIndex = 0; + for (Value value : values) { + valueIndex += 1; + if (isRdfTypePredicate) { + writeObject(out, (IRI) value, true); } else { - writeIri(out, (IRI) subject); + writeObject(out, value); } - out.write("\""); - if (poMap.size() > 0) { - out.write(","); + if (valueIndex < numValues) { + out.write(","); } if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); } else { - out.write("\n"); - } - - // Write predicate/object pairs rendered first. - for (IRI predicate : firstPredicates) { - if (poMap.containsKey(predicate)) { - SortedTurtleObjectList values = poMap.get(predicate); - // make a copy so we don't delete anything from the original - if (values != null) { - values = (SortedTurtleObjectList) values.clone(); - } - List valuesList = new ArrayList<>(); - if (! values.isEmpty()) { - if (predicate == Constants.RDF_TYPE) { - for (IRI preferredType : preferredRdfTypes) { - if (values.contains(preferredType)) { - valuesList.add(preferredType); - values.remove(preferredType); - } - } - } - - valuesList.addAll(values); - } - if (! valuesList.isEmpty()) { - writePredicateAndObjectValues(out, predicate, valuesList); - out.write(","); - if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter) out; - intendedOutput.writeEOL(); - } else { - out.write("\n"); - } - } - } + out.write("\n"); } + } + if (out instanceof IndentingWriter) { + IndentingWriter intendedOutput = (IndentingWriter) out; + intendedOutput.writeEOL(); + intendedOutput.decreaseIndentation(); + } else { + out.write("\n"); + } + out.write("]"); + } + } + + protected void writePredicate(Writer out, IRI predicate) throws Exception { + out.write(convertVerbIriToString(predicate, USE_GENERATED_PREFIXES, + false, true)); + } + + protected void writeIri(Writer out, IRI iri) throws Exception { + out.write(convertIriToString(iri)); + } + + protected void writeObject(Writer out, Value value) throws Exception { + if (value instanceof BNode) { + writeObject(out, (BNode) value); + } else if (value instanceof IRI) { + writeObject(out, (IRI) value); + } else if (value instanceof Literal) { + writeObject(out, (Literal) value); + } else { + out.write("\"" + value.stringValue() + "\""); + } + } - // Write other predicate/object pairs. - for (IRI predicate : poMap.sortedKeys()) { - if (!firstPredicates.contains(predicate)) { - SortedTurtleObjectList values = poMap.get(predicate); - writePredicateAndObjectValues(out, predicate, values); - out.write(","); - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - } else { - out.write("\n"); - } - } + protected void writeObject(Writer out, BNode bnode) throws Exception { + if (inlineBlankNodes) { + if (isCollection(comparisonContext, bnode, collectionClass)) { + // Open braces + out.write("{"); + if (out instanceof IndentingWriter) { + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); + output.increaseIndentation(); + } else { + out.write("\n"); } - // Write context - Set prefixes = getSubjectPrefixes(subject); - out.write("\"@context\" : {"); + // Write collection members + out.write("\"@list\" : ["); if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter)out; - output.writeEOL(); - output.increaseIndentation(); + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); + output.increaseIndentation(); } else { - out.write("\n"); + out.write("\n"); } - - // For an ontology, add the base URI to the context. - if (isOntology(subject)) { - if (baseIri != null) { - out.write("\"@base\" : \"" + baseIri + "\""); - if (! prefixes.isEmpty()) { - out.write(","); - } - } + List members = getCollectionMembers(unsortedTripleMap, bnode, collectionClass, comparisonContext); + int memberIndex = 0; + for (Value member : members) { + memberIndex++; + writeObject(out, member); + if (memberIndex < members.size()) { + out.write(","); if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); } else { - out.write("\n"); + out.write("\n"); } + } } - - int prefixCount = 0; - for (String prefix : prefixes) { - prefixCount++; - out.write("\"" + prefix + "\" : \"" + namespaceTable.get(prefix) + "\""); - if (prefixCount < prefixes.size()) { out.write(","); } - if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); - } else { - out.write("\n"); - } + if (out instanceof IndentingWriter) { + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); + output.decreaseIndentation(); + } else { + out.write("\n"); } + out.write("]"); + // Close braces if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.decreaseIndentation(); - out.write("}"); - intendedOutput.writeEOL(); + IndentingWriter output = (IndentingWriter) out; + output.decreaseIndentation(); + out.write("}"); } else { - out.write("}\n"); + out.write("}"); + } + } else { // not a collection + SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(bnode); + if (poMap == null) { + poMap = new SortedTurtlePredicateObjectMap(); } - // Close statement + // Open braces + out.write("{"); if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.decreaseIndentation(); - out.write("}"); + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); + output.increaseIndentation(); } else { - out.write("}"); + out.write("\n"); } - } - - private String convertIriToString(IRI iri) throws Exception { - return convertIriToString(iri, USE_GENERATED_PREFIXES, false, true); - } - protected void writePredicateAndObjectValues(Writer out, IRI predicate, Collection values) throws Exception { - final boolean isRdfTypePredicate = Constants.RDF_TYPE.equals(predicate); - out.write("\""); - writePredicate(out, predicate); - out.write("\" : "); - if (values.size() == 1) { - Object value = values.toArray()[0]; - if (isRdfTypePredicate) { - writeObject(out, (IRI) value, true); - } else { - writeObject(out, (Value) value); + // Write predicate/object pairs rendered first. + int predicateIndex = 0; + for (IRI predicate : firstPredicates) { + if (poMap.containsKey(predicate)) { + predicateIndex++; + SortedTurtleObjectList values = poMap.get(predicate); + writePredicateAndObjectValues(out, predicate, values); + if (predicateIndex < poMap.size()) { + out.write(","); } - } else if (values.size() > 1) { - out.write("["); if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); - intendedOutput.increaseIndentation(); + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); } else { - out.write("\n"); + out.write("\n"); } - int numValues = values.size(); - int valueIndex = 0; - for (Value value : values) { - valueIndex += 1; - if (isRdfTypePredicate) { - writeObject(out, (IRI)value, true); - } else { - writeObject(out, value); - } - if (valueIndex < numValues) { out.write(","); } - if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); - } else { - out.write("\n"); - } + } + } + + // Write other predicate/object pairs. + for (IRI predicate : poMap.sortedKeys()) { + if (!firstPredicates.contains(predicate)) { + predicateIndex++; + SortedTurtleObjectList values = poMap.get(predicate); + writePredicateAndObjectValues(out, predicate, values); + if (predicateIndex < poMap.size()) { + out.write(","); } if (out instanceof IndentingWriter) { - IndentingWriter intendedOutput = (IndentingWriter)out; - intendedOutput.writeEOL(); - intendedOutput.decreaseIndentation(); + IndentingWriter output = (IndentingWriter) out; + output.writeEOL(); } else { - out.write("\n"); + out.write("\n"); } - out.write("]"); + } } - } - protected void writePredicate(Writer out, IRI predicate) throws Exception { - out.write(convertVerbIriToString(predicate, USE_GENERATED_PREFIXES, - false, true)); - } - - protected void writeIri(Writer out, IRI iri) throws Exception { - out.write(convertIriToString(iri)); - } - - protected void writeObject(Writer out, Value value) throws Exception { - if (value instanceof BNode) { - writeObject(out, (BNode) value); - } else if (value instanceof IRI) { - writeObject(out, (IRI)value); - } else if (value instanceof Literal) { - writeObject(out, (Literal)value); + // Close braces + if (out instanceof IndentingWriter) { + IndentingWriter output = (IndentingWriter) out; + output.decreaseIndentation(); + out.write("}"); } else { - out.write("\"" + value.stringValue() + "\""); + out.write("}"); } + } + } else { // no inlining of blank nodes + if (unsortedTripleMap.containsKey(bnode)) { + out.write("{ \"@id\" : \"_:" + blankNodeNameMap.get(bnode) + "\" }"); + } else { + System.out.println("**** blank node not a subject: " + bnode.stringValue()); + System.out.flush(); + out.write("{ }"); // last resort - this should never happen + } } - - protected void writeObject(Writer out, BNode bnode) throws Exception { - if (inlineBlankNodes) { - if (isCollection(comparisonContext, bnode, collectionClass)) { - // Open braces - out.write("{"); - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - output.increaseIndentation(); - } else { - out.write("\n"); - } - - // Write collection members - out.write("\"@list\" : ["); - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - output.increaseIndentation(); - } else { - out.write("\n"); - } - List members = getCollectionMembers(unsortedTripleMap, bnode, collectionClass, comparisonContext); - int memberIndex = 0; - for (Value member : members) { - memberIndex++; - writeObject(out, member); - if (memberIndex < members.size()) { - out.write(","); - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - } else { - out.write("\n"); - } - } - } - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - output.decreaseIndentation(); - } else { - out.write("\n"); - } - out.write("]"); - - // Close braces - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.decreaseIndentation(); - out.write("}"); - } else { - out.write("}"); - } - } else { // not a collection - SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(bnode); - if (poMap == null) { poMap = new SortedTurtlePredicateObjectMap(); } - - // Open braces - out.write("{"); - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - output.increaseIndentation(); - } else { - out.write("\n"); - } - - // Write predicate/object pairs rendered first. - int predicateIndex = 0; - for (IRI predicate : firstPredicates) { - if (poMap.containsKey(predicate)) { - predicateIndex++; - SortedTurtleObjectList values = poMap.get(predicate); - writePredicateAndObjectValues(out, predicate, values); - if (predicateIndex < poMap.size()) { - out.write(","); - } - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - } else { - out.write("\n"); - } - } - } - - // Write other predicate/object pairs. - for (IRI predicate : poMap.sortedKeys()) { - if (!firstPredicates.contains(predicate)) { - predicateIndex++; - SortedTurtleObjectList values = poMap.get(predicate); - writePredicateAndObjectValues(out, predicate, values); - if (predicateIndex < poMap.size()) { - out.write(","); - } - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.writeEOL(); - } else { - out.write("\n"); - } - } - } - - // Close braces - if (out instanceof IndentingWriter) { - IndentingWriter output = (IndentingWriter) out; - output.decreaseIndentation(); - out.write("}"); - } else { - out.write("}"); - } - } - } else { // no inlining of blank nodes - if (unsortedTripleMap.containsKey(bnode)) { - out.write("{ \"@id\" : \"_:" + blankNodeNameMap.get(bnode) + "\" }"); - } else { - System.out.println("**** blank node not a subject: " + bnode.stringValue()); System.out.flush(); - out.write("{ }"); // last resort - this should never happen - } + } + + protected void writeObject(Writer out, IRI iri) throws Exception { + writeObject(out, iri, false); + } + + protected void writeObject(Writer out, IRI iri, boolean isRdfType) throws Exception { + out.write(isRdfType ? "\"" : "{ \"@id\" : \""); + writeIri(out, iri); + out.write(isRdfType ? "\"" : "\" }"); + } + + protected void writeObject(Writer out, Literal literal) throws Exception { + if (literal == null) { + out.write("null"); + } else if (literal.getLanguage().isPresent() || ((overrideStringLanguage != null) + && (literal.getDatatype().stringValue().equals(Constants.xsString.stringValue())))) { + out.write("{"); + if (out instanceof IndentingWriter) { + var indentingWriter = (IndentingWriter) out; + indentingWriter.writeEOL(); + indentingWriter.increaseIndentation(); + } else { + out.write("\n"); + } + + String lang = overrideStringLanguage == null ? + literal.getLanguage().orElse(overrideStringLanguage) : + overrideStringLanguage; + + out.write("\"@language\" : \"" + lang + "\","); + if (out instanceof IndentingWriter) { + var output = (IndentingWriter) out; + output.writeEOL(); + } else { + out.write("\n"); + } + + out.write("\"@value\" : \"" + escapeString(literal.stringValue()) + "\""); + if (out instanceof IndentingWriter) { + var indentingWriter = (IndentingWriter) out; + indentingWriter.writeEOL(); + } else { + out.write("\n"); + } + + if (out instanceof IndentingWriter) { + var indentingWriter = (IndentingWriter) out; + indentingWriter.decreaseIndentation(); + out.write("}"); + } else { + out.write("}"); + } + } else if (literal.getDatatype() != null) { + boolean useExplicit = (stringDataTypeOption == StringDataTypeOptions.EXPLICIT) + || !(Constants.xsString.equals(literal.getDatatype()) + || Constants.rdfLangString.equals(literal.getDatatype())); + if (useExplicit) { + out.write("{"); + if (out instanceof IndentingWriter) { + var indentingWriter = (IndentingWriter) out; + indentingWriter.writeEOL(); + indentingWriter.increaseIndentation(); + } else { + out.write("\n"); } - } - - protected void writeObject(Writer out, IRI iri) throws Exception { - writeObject(out, iri, false); - } - - protected void writeObject(Writer out, IRI iri, boolean isRdfType) throws Exception { - out.write(isRdfType ? "\"" : "{ \"@id\" : \""); - writeIri(out, iri); - out.write(isRdfType ? "\"" : "\" }"); - } - - protected void writeObject(Writer out, Literal literal) throws Exception { - if (literal == null) { - out.write("null"); - } else if (literal.getLanguage().isPresent() || ((overrideStringLanguage != null) - && (literal.getDatatype().stringValue().equals(Constants.xsString.stringValue())))) { - out.write("{"); - if (out instanceof IndentingWriter) { - var indentingWriter = (IndentingWriter) out; - indentingWriter.writeEOL(); - indentingWriter.increaseIndentation(); - } else { - out.write("\n"); - } - String lang = overrideStringLanguage == null ? - literal.getLanguage().orElse(overrideStringLanguage) : - overrideStringLanguage; + out.write("\"@type\" : \""); + writeIri(out, literal.getDatatype()); + out.write("\","); + if (out instanceof IndentingWriter) { + var indentingWriter = (IndentingWriter) out; + indentingWriter.writeEOL(); + } else { + out.write("\n"); + } - out.write("\"@language\" : \"" + lang + "\","); - if (out instanceof IndentingWriter) { - var output = (IndentingWriter) out; - output.writeEOL(); - } else { - out.write("\n"); - } + out.write("\"@value\" : "); + writeString(out, literal.stringValue()); - out.write("\"@value\" : \"" + escapeString(literal.stringValue()) + "\""); - if (out instanceof IndentingWriter) { - var indentingWriter = (IndentingWriter) out; - indentingWriter.writeEOL(); - } else { - out.write("\n"); - } - - if (out instanceof IndentingWriter) { - var indentingWriter = (IndentingWriter) out; - indentingWriter.decreaseIndentation(); - out.write("}"); - } else { - out.write("}"); - } - } else if (literal.getDatatype() != null) { - boolean useExplicit = (stringDataTypeOption == StringDataTypeOptions.EXPLICIT) - || !(Constants.xsString.equals(literal.getDatatype()) - || Constants.rdfLangString.equals(literal.getDatatype())); - if (useExplicit) { - out.write("{"); - if (out instanceof IndentingWriter) { - var indentingWriter = (IndentingWriter) out; - indentingWriter.writeEOL(); - indentingWriter.increaseIndentation(); - } else { - out.write("\n"); - } - - out.write("\"@type\" : \""); - writeIri(out, literal.getDatatype()); - out.write("\","); - if (out instanceof IndentingWriter) { - var indentingWriter = (IndentingWriter) out; - indentingWriter.writeEOL(); - } else { - out.write("\n"); - } - - out.write("\"@value\" : "); - writeString(out, literal.stringValue()); - - if (out instanceof IndentingWriter) { - var indentingWriter = (IndentingWriter) out; - indentingWriter.decreaseIndentation(); - indentingWriter.writeEOL(); - out.write("}"); - } else { - out.write("\n}"); - } - } else { - writeString(out, literal.stringValue()); - } + if (out instanceof IndentingWriter) { + var indentingWriter = (IndentingWriter) out; + indentingWriter.decreaseIndentation(); + indentingWriter.writeEOL(); + out.write("}"); } else { - writeString(out, literal.stringValue()); + out.write("\n}"); } + } else { + writeString(out, literal.stringValue()); + } + } else { + writeString(out, literal.stringValue()); } + } - protected void writeString(Writer out, String str) throws Exception { - // Note that JSON does not support multi-line strings, unlike Turtle - if (str == null) { return; } - out.write("\""); - out.write(escapeString(str)); - out.write("\""); + protected void writeString(Writer out, String str) throws Exception { + // Note that JSON does not support multi-line strings, unlike Turtle + if (str == null) { + return; } - - protected void writeFooter(Writer out, String[] trailingComments) throws Exception { - // Write closing bracket for subject list. - output.writeEOL(); - output.decreaseIndentation(); - output.write("]"); - output.writeEOL(); - - // Process trailing comments, if any. - if ((trailingComments != null) && (trailingComments.length >= 1)) { - System.err.println("#### trailing comments ignored - JSON-LD does not support comments"); - System.err.flush(); - } + out.write("\""); + out.write(escapeString(str)); + out.write("\""); + } + + protected void writeFooter(Writer out, String[] trailingComments) throws Exception { + // Write closing bracket for subject list. + output.writeEOL(); + output.decreaseIndentation(); + output.write("]"); + output.writeEOL(); + + // Process trailing comments, if any. + if ((trailingComments != null) && (trailingComments.length >= 1)) { + System.err.println("#### trailing comments ignored - JSON-LD does not support comments"); + System.err.flush(); } + } - private String escapeString(String str) { // JSON does not support multi-line strings, different to Turtle - final char SPACE = ' '; - final char UNESCAPED_BACKSLASH = '\\'; - if (str == null) { return null; } - StringBuilder sb = new StringBuilder(); - for (char ch : str.toCharArray()) { - if (ch < SPACE) { - sb.append(UNESCAPED_BACKSLASH); - sb.append('u'); - sb.append(String.format("%04x", (short) ch)); - } else { - switch (ch) { - case '\n': sb.append(UNESCAPED_BACKSLASH); sb.append('n'); break; - case '\r': sb.append(UNESCAPED_BACKSLASH); sb.append('r'); break; - case '"': sb.append(UNESCAPED_BACKSLASH); sb.append('"'); break; - case '\\': sb.append(UNESCAPED_BACKSLASH); sb.append(UNESCAPED_BACKSLASH); break; - default: sb.append(ch); - } - } + private String escapeString(String str) { // JSON does not support multi-line strings, different to Turtle + final char SPACE = ' '; + final char UNESCAPED_BACKSLASH = '\\'; + if (str == null) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (char ch : str.toCharArray()) { + if (ch < SPACE) { + sb.append(UNESCAPED_BACKSLASH); + sb.append('u'); + sb.append(String.format("%04x", (short) ch)); + } else { + switch (ch) { + case '\n': + sb.append(UNESCAPED_BACKSLASH); + sb.append('n'); + break; + case '\r': + sb.append(UNESCAPED_BACKSLASH); + sb.append('r'); + break; + case '"': + sb.append(UNESCAPED_BACKSLASH); + sb.append('"'); + break; + case '\\': + sb.append(UNESCAPED_BACKSLASH); + sb.append(UNESCAPED_BACKSLASH); + break; + default: + sb.append(ch); } - return sb.toString(); + } } + return sb.toString(); + } } diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfWriter.java b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfWriter.java index e60d60d..08f24ee 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfWriter.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfWriter.java @@ -31,13 +31,16 @@ import static org.edmcouncil.rdf_toolkit.util.Constants.OVERRIDE_STRING_LANGUAGE; import static org.edmcouncil.rdf_toolkit.util.Constants.SHORT_URI_PREF; import static org.edmcouncil.rdf_toolkit.util.Constants.STRING_DATA_TYPE_OPTION; +import static org.edmcouncil.rdf_toolkit.util.Constants.SUPPRESS_NAMED_INDIVIDUALS; import static org.edmcouncil.rdf_toolkit.util.Constants.TRAILING_COMMENTS; import static org.edmcouncil.rdf_toolkit.util.Constants.USE_DTD_SUBSET; + import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.query.algebra.If; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFHandlerException; import org.eclipse.rdf4j.rio.helpers.AbstractRDFWriter; @@ -71,586 +74,650 @@ import javax.xml.namespace.QName; /** - * Equivalent to Sesame's built-in RDF writer, but the triples are sorted into a consistent order. - * In order to do the sorting, it must be possible to load all of the RDF statements into memory. - * NOTE: comments are suppressed, as there isn't a clear way to sort them along with triples. + * Equivalent to Sesame's built-in RDF writer, but the triples are sorted into a consistent order. In order to do the + * sorting, it must be possible to load all of the RDF statements into memory. NOTE: comments are suppressed, as there + * isn't a clear way to sort them along with triples. */ public abstract class SortedRdfWriter extends AbstractRDFWriter { - // TODO: add common methods for "eol and increase indent", "eol and decrease indent" and "eol with same indent" and - // refactor using these - - public static final String DEFAULT_LINE_END = "\n"; - - /** Preferred rdf:type values when rendering RDF. */ - protected static final List preferredRdfTypes = Arrays.asList( - Constants.owlNamedIndividual, - Constants.owlDatatypeProperty, - Constants.owlObjectProperty); - - /** - * Contains data that is relevant in comparisons across different comparators. - */ - protected ComparisonContext comparisonContext; - - /** Base IRI for the RDF output document. */ - protected IRI baseIri = null; - - /** Preference for prefix or base-IRI based IRI shortening. */ - protected ShortIriPreferences shortIriPreference = ShortIriPreferences.PREFIX; - - /** Whether to use a DTD subset to allow IRI shortening in RDF/XML */ - protected boolean useDtdSubset = false; - - /** Whether to inline blank nodes */ - protected boolean inlineBlankNodes = false; - - /** Leading comment lines */ - protected String[] leadingComments = null; - - /** Trailing comment lines */ - protected String[] trailingComments = null; - - /** String data type option */ - protected StringDataTypeOptions stringDataTypeOption = StringDataTypeOptions.IMPLICIT; - - /** Override string language */ - protected String overrideStringLanguage = null; - - /** Unsorted list of subjects which are OWL ontologies, as they are rendered before other subjects. */ - protected UnsortedTurtleResourceList unsortedOntologies = null; - - /** Sorted list of subjects which are OWL ontologies, as they are rendered before other subjects. */ - protected SortedTurtleResourceList sortedOntologies = null; - - /** Unsorted list of blank nodes, as they are rendered separately from other nodes. */ - protected UnsortedTurtleResourceList unsortedBlankNodes = null; - - /** Sorted list of blank nodes, as they are rendered separately from other nodes. */ - protected SortedTurtleResourceList sortedBlankNodes = null; - - /** Unsorted list of blank nodes that are objects of statements. */ - protected UnsortedTurtleBNodeList objectBlankNodes = null; - - /** Map of serialisation names for blank nodes. */ - protected HashMap blankNodeNameMap = null; - - /** Unsorted hash map containing triple data. */ - protected UnsortedTurtleSubjectPredicateObjectMap unsortedTripleMap = null; - - /** Sorted hash map containing triple data. */ - protected SortedTurtleSubjectPredicateObjectMap sortedTripleMap = null; - - /** All predicates from the input ontology. */ - protected HashSet allPredicates = null; - - /** Predicates that are specially rendered before all others. */ - protected List firstPredicates = null; - - /** - * Namespace mappings created by the serializer. - */ - protected Map generatedNamespaceTable = null; - - /** Reverse namespace table used to map IRIs to prefixes. Key is IRI string, value is prefix string. */ - protected ReverseNamespaceTable reverseNamespaceTable = null; - - /** If set to true, xmlns namespace is ignored */ - protected boolean omitXmlnsNamespace = false; - - /** Output stream for this RDF writer. */ - protected Writer out; - - /** - * Creates an RDFWriter instance that will write sorted RDF to the supplied output stream. - * - * @param out The OutputStream to write the RDF to. - */ - protected SortedRdfWriter(OutputStream out) { - this(new OutputStreamWriter(out)); + // TODO: add common methods for "eol and increase indent", "eol and decrease indent" and "eol with same indent" and + // refactor using these + + public static final String DEFAULT_LINE_END = "\n"; + + /** + * Preferred rdf:type values when rendering RDF. + */ + protected static final List PREFERRED_RDF_TYPES = Arrays.asList( + Constants.owlNamedIndividual, + Constants.owlDatatypeProperty, + Constants.owlObjectProperty); + + /** + * Contains data that is relevant in comparisons across different comparators. + */ + protected ComparisonContext comparisonContext; + + /** + * Base IRI for the RDF output document. + */ + protected IRI baseIri = null; + + /** + * Preference for prefix or base-IRI based IRI shortening. + */ + protected ShortIriPreferences shortIriPreference = ShortIriPreferences.PREFIX; + + /** + * Whether to use a DTD subset to allow IRI shortening in RDF/XML + */ + protected boolean useDtdSubset = false; + + /** + * Whether to inline blank nodes + */ + protected boolean inlineBlankNodes = false; + + /** + * Leading comment lines + */ + protected String[] leadingComments = null; + + /** + * Trailing comment lines + */ + protected String[] trailingComments = null; + + /** + * String data type option + */ + protected StringDataTypeOptions stringDataTypeOption = StringDataTypeOptions.IMPLICIT; + + /** + * Override string language + */ + protected String overrideStringLanguage = null; + + /** + * Unsorted list of subjects which are OWL ontologies, as they are rendered before other subjects. + */ + protected UnsortedTurtleResourceList unsortedOntologies = null; + + /** + * Sorted list of subjects which are OWL ontologies, as they are rendered before other subjects. + */ + protected SortedTurtleResourceList sortedOntologies = null; + + /** + * Unsorted list of blank nodes, as they are rendered separately from other nodes. + */ + protected UnsortedTurtleResourceList unsortedBlankNodes = null; + + /** + * Sorted list of blank nodes, as they are rendered separately from other nodes. + */ + protected SortedTurtleResourceList sortedBlankNodes = null; + + /** + * Unsorted list of blank nodes that are objects of statements. + */ + protected UnsortedTurtleBNodeList objectBlankNodes = null; + + /** + * Map of serialisation names for blank nodes. + */ + protected HashMap blankNodeNameMap = null; + + /** + * Unsorted hash map containing triple data. + */ + protected UnsortedTurtleSubjectPredicateObjectMap unsortedTripleMap = null; + + /** + * Sorted hash map containing triple data. + */ + protected SortedTurtleSubjectPredicateObjectMap sortedTripleMap = null; + + /** + * All predicates from the input ontology. + */ + protected HashSet allPredicates = null; + + /** + * Predicates that are specially rendered before all others. + */ + protected List firstPredicates = null; + + /** + * Namespace mappings created by the serializer. + */ + protected Map generatedNamespaceTable = null; + + /** + * Reverse namespace table used to map IRIs to prefixes. Key is IRI string, value is prefix string. + */ + protected ReverseNamespaceTable reverseNamespaceTable = null; + + /** + * If set to true, xmlns namespace is ignored + */ + protected boolean omitXmlnsNamespace = false; + + /** + * If set to true, owl:NamedIndividual will not be used when serializing + */ + protected boolean suppressNamedIndividuals = false; + + /** + * Output stream for this RDF writer. + */ + protected Writer out; + + /** + * Creates an RDFWriter instance that will write sorted RDF to the supplied output stream. + * + * @param out The OutputStream to write the RDF to. + */ + protected SortedRdfWriter(OutputStream out) { + this(new OutputStreamWriter(out)); + } + + /** + * Creates an RDFWriter instance that will write sorted RDF to the supplied writer. + * + * @param writer The Writer to write the RDF to. + */ + protected SortedRdfWriter(Writer writer) { + this(writer, new HashMap<>()); + } + + /** + * Creates an RDFWriter instance that will write sorted RDF to the supplied output stream. + * + * @param out The OutputStream to write the RDF to. + * @param options options for the RDF writer. + */ + protected SortedRdfWriter(OutputStream out, Map options) { + this(new OutputStreamWriter(out), options); + } + + /** + * Creates an RDFWriter instance that will write sorted RDF to the supplied writer. + * + * @param writer The Writer to write the RDF to. + * @param options options for the RDF writer. + */ + protected SortedRdfWriter(Writer writer, Map options) { + if (writer == null) { + throw new IllegalStateException("Writer object can't be null!"); } + this.out = writer; + processOptions(options); + } - /** - * Creates an RDFWriter instance that will write sorted RDF to the supplied writer. - * - * @param writer The Writer to write the RDF to. - */ - protected SortedRdfWriter(Writer writer) { - this(writer, new HashMap<>()); + private void processOptions(Map options) { + if (options.containsKey(BASE_IRI)) { + this.baseIri = (IRI) options.get(BASE_IRI); } - - /** - * Creates an RDFWriter instance that will write sorted RDF to the supplied output stream. - * - * @param out The OutputStream to write the RDF to. - * @param options options for the RDF writer. - */ - protected SortedRdfWriter(OutputStream out, Map options) { - this(new OutputStreamWriter(out), options); + if (options.containsKey(SHORT_URI_PREF)) { + this.shortIriPreference = (ShortIriPreferences) options.get(SHORT_URI_PREF); } - - /** - * Creates an RDFWriter instance that will write sorted RDF to the supplied writer. - * - * @param writer The Writer to write the RDF to. - * @param options options for the RDF writer. - */ - protected SortedRdfWriter(Writer writer, Map options) { - if (writer == null) { - throw new IllegalStateException("Writer object can't be null!"); + if (options.containsKey(USE_DTD_SUBSET)) { + this.useDtdSubset = (Boolean) options.get(USE_DTD_SUBSET); + } + if (options.containsKey(INLINE_BLANK_NODES)) { + this.inlineBlankNodes = (Boolean) options.get(INLINE_BLANK_NODES); + } + if (options.containsKey(LEADING_COMMENTS)) { + this.leadingComments = (String[]) options.get(LEADING_COMMENTS); + } + if (options.containsKey(TRAILING_COMMENTS)) { + this.trailingComments = (String[]) options.get(TRAILING_COMMENTS); + } + if (options.containsKey(STRING_DATA_TYPE_OPTION)) { + this.stringDataTypeOption = (StringDataTypeOptions) options.get(STRING_DATA_TYPE_OPTION); + } + if (options.containsKey(OVERRIDE_STRING_LANGUAGE)) { + this.overrideStringLanguage = (String) options.get(OVERRIDE_STRING_LANGUAGE); + } + if (options.containsKey(OMIT_XMLNS_NAMESPACE)) { + this.omitXmlnsNamespace = options.containsKey(OMIT_XMLNS_NAMESPACE) + && Boolean.parseBoolean(options.get(OMIT_XMLNS_NAMESPACE).toString()); + } + if (options.containsKey(SUPPRESS_NAMED_INDIVIDUALS)) { + this.suppressNamedIndividuals = Boolean.parseBoolean(options.get(SUPPRESS_NAMED_INDIVIDUALS).toString()); + } + } + + /** + * Gets the RDF format that this RDFWriter uses. + */ + @Override + public RDFFormat getRDFFormat() { + return RDFFormat.TURTLE; + } + + /** + * Converts a IRI to a QName, if possible, given the available namespace prefixes. Returns null if there is no match + * to a prefix. + * + * @param iri The IRI to convert to a QName, if possible. + * @param useGeneratedPrefixes Whether to use namespace prefixes generated by the serializer. + * @return The equivalent QName for the IRI, or null if no equivalent. + */ + protected QName convertIriToQName(IRI iri, boolean useGeneratedPrefixes) { + String iriString = iri.stringValue(); + for (String iriStem : reverseNamespaceTable.keySet()) { + String prefix = reverseNamespaceTable.get(iriStem); + if ((iriString.length() > iriStem.length()) && iriString.startsWith(iriStem)) { + String localPart = iriString.substring(iriStem.length()); + if (TextUtils.isPrefixedNameLocalPart(localPart)) { // to be a value QName, the 'local part' has to be valid + if (useGeneratedPrefixes || !generatedNamespaceTable.containsKey(prefix)) { + return new QName(iriStem, localPart, prefix); + } else { + return null; + } + } else { + return null; } - this.out = writer; - processOptions(options); + } else if (iriString.startsWith(String.format("%s:", prefix))) { + return new QName(iriStem, iriString.substring(iriString.indexOf(':') + 1), prefix); + } } - - private void processOptions(Map options) { - if (options.containsKey(BASE_IRI)) { - this.baseIri = (IRI) options.get(BASE_IRI); + // Failed to find a match, return null. + return null; + } + + protected String convertIriToRelativeIri(IRI iri, boolean useTurtleQuoting) throws Exception { + // Note: does not check that the baseIri doesn't terminate in the middle of some IRI of which it really isn't the base. + if (baseIri != null) { + String iriString = iri.stringValue(); + String baseIriString = baseIri.stringValue(); + String relativeIriString = (new URI(baseIriString)).relativize(new URI(iriString)).toString(); + return String.format("%s%s%s", + useTurtleQuoting ? "<" : "", + relativeIriString.length() >= 1 ? relativeIriString : iriString, // avoid zero-length relative IRIs + useTurtleQuoting ? ">" : "" + ); + } + // Failed to find a match, return null. + return String.format("%s%s%s", + useTurtleQuoting ? "<" : "", + iri.stringValue(), + useTurtleQuoting ? ">" : "" + ); + } + + /** + * Signals the start of the RDF data. This method is called before any data is reported. + * + * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. + */ + @Override + public void startRDF() throws RDFHandlerException { + allPredicates = new HashSet<>(); + namespaceTable = new TreeMap<>(); + generatedNamespaceTable = new TreeMap<>(); + unsortedOntologies = new UnsortedTurtleResourceList(); + unsortedBlankNodes = new UnsortedTurtleResourceList(); + blankNodeNameMap = new HashMap<>(); + unsortedTripleMap = new UnsortedTurtleSubjectPredicateObjectMap(); + objectBlankNodes = new UnsortedTurtleBNodeList(); + comparisonContext = new ComparisonContext(inlineBlankNodes, unsortedTripleMap); + } + + /** + * Adds a default namespace prefix to the namespace table, if no prefix has been defined. + * + * @param namespaceIri The namespace IRI. Cannot be null. + * @param defaultPrefix The default prefix to use, if no prefix is yet assigned. Cannot be null. + */ + protected void addDefaultNamespacePrefixIfMissing(String namespaceIri, String defaultPrefix) { + if ((namespaceIri != null) && (defaultPrefix != null)) { + if (!namespaceTable.containsValue(namespaceIri)) { + namespaceTable.put(defaultPrefix, namespaceIri); + } + } + } + + /** + * Checks if all predicate IRIs have a matching namespace prefix, i.e. that they can be converted into QNames. This is + * needed for RDF/XML. If there is a predicate without a matching namespace prefix, a namespace prefix is created for + * it. + */ + protected void addNamespacePrefixesForPredicates() { + int namespaceIndex = 1; + for (IRI predicate : allPredicates) { + String predicateString = predicate.stringValue(); + int namespaceIriEndPos = Math.max( + predicateString.lastIndexOf("/"), + predicateString.lastIndexOf("#") + ); + String namespaceIri = predicateString.substring(0, namespaceIriEndPos + 1); + if (namespaceIri.length() >= 1) { + if (!namespaceTable.containsValue(namespaceIri)) { + String newPrefix = "zzzns" + String.format("%04d", namespaceIndex); // TODO zzzns? + namespaceTable.put(newPrefix, namespaceIri); + generatedNamespaceTable.put(newPrefix, + namespaceIri); // track the namespace mappings created by the serializer + namespaceIndex += 1; } - if (options.containsKey(SHORT_URI_PREF)) { - this.shortIriPreference = (ShortIriPreferences) options.get(SHORT_URI_PREF); + } + } + } + + /** + * Signals the end of the RDF data. This method is called when all data has been reported. + * + * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. + */ + @Override + public void endRDF() throws RDFHandlerException { + try { + // !!!! Override method must set values for 'sortedOntologies', 'sortedTripleMap' & 'sortedBlankNodes' before calling this method + + // Create serialisation names for blank nodes. + String blankNodeNamePadding = prepareBlankNodeNamePadding(unsortedBlankNodes.size()); + + int blankNodeIndex = 0; + for (Value value : sortedBlankNodes) { + if (value instanceof BNode) { + BNode bnode = (BNode) value; + blankNodeIndex++; + String blankNodeName = Integer.toString(blankNodeIndex); + if (blankNodeName.length() < blankNodeNamePadding.length()) { + blankNodeName = blankNodeNamePadding.substring(0, blankNodeNamePadding.length() - blankNodeName.length()) + + blankNodeName; + } + blankNodeName = "blank" + blankNodeName; + blankNodeNameMap.put(bnode, blankNodeName); } - if (options.containsKey(USE_DTD_SUBSET)) { - this.useDtdSubset = (Boolean) options.get(USE_DTD_SUBSET); + } + + populateListOfFirstPredicates(); + + // Add default namespace prefixes, if they haven't yet been defined. May fail if these prefixes have + // already been defined for different namespace IRIs. + addDefaultNamespacePrefixIfMissing(Constants.RDF_NS_URI, "rdf"); + addDefaultNamespacePrefixIfMissing(Constants.RDFS_NS_URI, "rdfs"); + addDefaultNamespacePrefixIfMissing(Constants.OWL_NS_URI, "owl"); + addDefaultNamespacePrefixIfMissing(Constants.XML_SCHEMA_NS_URI, "xs"); + + // Add any extra namespaces needed to make all predicates writeable as a QName. This is especially needed for RDF/XML. + addNamespacePrefixesForPredicates(); + + // Create reverse namespace table. + reverseNamespaceTable = new ReverseNamespaceTable(); + for (String prefix : namespaceTable.keySet()) { + String iri = namespaceTable.get(prefix); + reverseNamespaceTable.put(iri, prefix); + } + + // Create list of imports + SortedTurtleObjectList importList = new SortedTurtleObjectList(comparisonContext); + for (Resource subject : sortedOntologies) { + if (sortedTripleMap.containsKey(subject)) { + SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(subject); + if (poMap.containsKey(Constants.OWL_IMPORTS)) { + SortedTurtleObjectList importsOList = poMap.get(Constants.OWL_IMPORTS); + importList.addAll(importsOList); + } } - if (options.containsKey(INLINE_BLANK_NODES)) { - this.inlineBlankNodes = (Boolean) options.get(INLINE_BLANK_NODES); + } + + // Write header information, including leading comments. + writeHeader(out, importList, leadingComments); + + // Track how many of the subjects have been written + int allSubjectCount = 0; + for (Resource subject : sortedOntologies) { + if (!(subject instanceof BNode)) { + allSubjectCount++; } - if (options.containsKey(LEADING_COMMENTS)) { - this.leadingComments = (String[]) options.get(LEADING_COMMENTS); + } + for (Resource subject : sortedTripleMap.sortedKeys()) { + if (!sortedOntologies.contains(subject) && !(subject instanceof BNode)) { + allSubjectCount++; } - if (options.containsKey(TRAILING_COMMENTS)) { - this.trailingComments = (String[]) options.get(TRAILING_COMMENTS); + } + for (Resource resource : sortedBlankNodes) { + if (!inlineBlankNodes || !objectBlankNodes.contains(resource)) { + BNode bnode = (BNode) resource; + if (unsortedTripleMap.containsKey(bnode)) { + allSubjectCount++; + } } - if (options.containsKey(STRING_DATA_TYPE_OPTION)) { - this.stringDataTypeOption = (StringDataTypeOptions) options.get(STRING_DATA_TYPE_OPTION); + } + + // Write out subjects which are unsortedOntologies. + // TODO Above comment is probably misleading. Here we write down triples by a subject + int subjectCount = 0; + for (Resource subject : sortedOntologies) { + if (!(subject instanceof BNode)) { + subjectCount++; + writeSubjectTriples(out, subject); + if (subjectCount < allSubjectCount) { + writeSubjectSeparator(out); + } } - if (options.containsKey(OVERRIDE_STRING_LANGUAGE)) { - this.overrideStringLanguage = (String) options.get(OVERRIDE_STRING_LANGUAGE); + } + + // Write out all other subjects (not unsortedOntologies; also not blank nodes). + for (Resource subject : sortedTripleMap.sortedKeys()) { + if (!sortedOntologies.contains(subject) && !(subject instanceof BNode)) { + subjectCount++; + writeSubjectTriples(out, subject); + if (subjectCount < allSubjectCount) { + writeSubjectSeparator(out); + } } - if (options.containsKey(OMIT_XMLNS_NAMESPACE)) { - this.omitXmlnsNamespace = options.containsKey(OMIT_XMLNS_NAMESPACE) - && Boolean.parseBoolean(options.get(OMIT_XMLNS_NAMESPACE).toString()); + } + + // Write out blank nodes that are subjects, if blank nodes are not being inlined or if the blank node is not an object. + for (Resource resource : sortedBlankNodes) { + if (!inlineBlankNodes || !objectBlankNodes.contains(resource)) { + BNode bnode = (BNode) resource; + if (unsortedTripleMap.containsKey(bnode)) { + subjectCount++; + writeSubjectTriples(out, bnode); + if (subjectCount < allSubjectCount) { + writeSubjectSeparator(out); + } + } } - } + } - /** - * Gets the RDF format that this RDFWriter uses. - */ - @Override - public RDFFormat getRDFFormat() { - return RDFFormat.TURTLE; - } + // Write footer information, including any trailing comments. + writeFooter(out, trailingComments); - /** - * Converts a IRI to a QName, if possible, given the available namespace prefixes. Returns null if there is no match to a prefix. - * @param iri The IRI to convert to a QName, if possible. - * @param useGeneratedPrefixes Whether to use namespace prefixes generated by the serializer. - * @return The equivalent QName for the IRI, or null if no equivalent. - */ - protected QName convertIriToQName(IRI iri, boolean useGeneratedPrefixes) { - String iriString = iri.stringValue(); - for (String iriStem : reverseNamespaceTable.keySet()) { - String prefix = reverseNamespaceTable.get(iriStem); - if ((iriString.length() > iriStem.length()) && iriString.startsWith(iriStem)) { - String localPart = iriString.substring(iriStem.length()); - if (TextUtils.isPrefixedNameLocalPart(localPart)) { // to be a value QName, the 'local part' has to be valid - if (useGeneratedPrefixes || !generatedNamespaceTable.containsKey(prefix)) { - return new QName(iriStem, localPart, prefix); - } else { - return null; - } - } else { - return null; - } - } else if (iriString.startsWith(String.format("%s:", prefix))) { - return new QName(iriStem, iriString.substring(iriString.indexOf(':')+1), prefix); - } - } - // Failed to find a match, return null. - return null; + out.flush(); + } catch (Throwable t) { + throw new RDFHandlerException("unable to generate/write RDF output", t); } - - protected String convertIriToRelativeIri(IRI iri, boolean useTurtleQuoting) throws Exception { - // Note: does not check that the baseIri doesn't terminate in the middle of some IRI of which it really isn't the base. - if (baseIri != null) { - String iriString = iri.stringValue(); - String baseIriString = baseIri.stringValue(); - String relativeIriString = (new URI(baseIriString)).relativize(new URI(iriString)).toString(); - return String.format("%s%s%s", - useTurtleQuoting ? "<" : "", - relativeIriString.length() >= 1 ? relativeIriString : iriString, // avoid zero-length relative IRIs - useTurtleQuoting ? ">" : "" - ); - } - // Failed to find a match, return null. - return String.format("%s%s%s", - useTurtleQuoting ? "<" : "", - iri.stringValue(), - useTurtleQuoting ? ">" : "" - ); + } + + private String prepareBlankNodeNamePadding(int numberOfBlankNodes) { + StringBuilder blankNodeNamePaddingBuilder = new StringBuilder(); + blankNodeNamePaddingBuilder.append("0"); + int blankNodeCount = numberOfBlankNodes; + while (blankNodeCount > 9) { + blankNodeCount /= 10; + blankNodeNamePaddingBuilder.append("0"); } - - /** - * Signals the start of the RDF data. This method is called before any data - * is reported. - * - * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. - */ - @Override - public void startRDF() throws RDFHandlerException { - allPredicates = new HashSet<>(); - namespaceTable = new TreeMap<>(); - generatedNamespaceTable = new TreeMap<>(); - unsortedOntologies = new UnsortedTurtleResourceList(); - unsortedBlankNodes = new UnsortedTurtleResourceList(); - blankNodeNameMap = new HashMap<>(); - unsortedTripleMap = new UnsortedTurtleSubjectPredicateObjectMap(); - objectBlankNodes = new UnsortedTurtleBNodeList(); - comparisonContext = new ComparisonContext(inlineBlankNodes, unsortedTripleMap); + return blankNodeNamePaddingBuilder.toString(); + } + + private void populateListOfFirstPredicates() { + // Set up list of predicates that appear first under their subjects. + firstPredicates = new ArrayList<>(); // predicates that are specially rendered first + firstPredicates.add(Constants.RDF_TYPE); + firstPredicates.add(Constants.RDFS_SUB_CLASS_OF); + firstPredicates.add(Constants.RDFS_SUB_PROPERTY_OF); + firstPredicates.add(Constants.OWL_SAME_AS); + firstPredicates.add(Constants.RDFS_LABEL); + firstPredicates.add(Constants.RDFS_COMMENT); + firstPredicates.add(Constants.OWL_ON_PROPERTY); + firstPredicates.add(Constants.OWL_ON_CLASS); + } + + /** + * Handles a statement. + * + * @param st The statement. + * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. + */ + @Override + public void handleStatement(Statement st) throws RDFHandlerException { + // Store the predicate. + allPredicates.add(st.getPredicate()); + + // Store the object if it is a blank node. + if (st.getObject() instanceof BNode) { + objectBlankNodes.add((BNode) st.getObject()); } - /** - * Adds a default namespace prefix to the namespace table, if no prefix has been defined. - * @param namespaceIri The namespace IRI. Cannot be null. - * @param defaultPrefix The default prefix to use, if no prefix is yet assigned. Cannot be null. - */ - protected void addDefaultNamespacePrefixIfMissing(String namespaceIri, String defaultPrefix) { - if ((namespaceIri != null) && (defaultPrefix != null)) { - if (!namespaceTable.containsValue(namespaceIri)) { - namespaceTable.put(defaultPrefix, namespaceIri); - } - } + // Store the statement in the main 'triple map'. + UnsortedTurtlePredicateObjectMap poMap = null; + if (unsortedTripleMap.containsKey(st.getSubject())) { + poMap = unsortedTripleMap.get(st.getSubject()); + } else { + poMap = new UnsortedTurtlePredicateObjectMap(); + unsortedTripleMap.put(st.getSubject(), poMap); } - /** - * Checks if all predicate IRIs have a matching namespace prefix, i.e. that they can be converted into QNames. - * This is needed for RDF/XML. - * If there is a predicate without a matching namespace prefix, a namespace prefix is created for it. - */ - protected void addNamespacePrefixesForPredicates() { - int namespaceIndex = 1; - for (IRI predicate : allPredicates) { - String predicateString = predicate.stringValue(); - int namespaceIriEndPos = Math.max( - predicateString.lastIndexOf("/"), - predicateString.lastIndexOf("#") - ); - String namespaceIri = predicateString.substring(0, namespaceIriEndPos + 1); - if (namespaceIri.length() >= 1) { - if (!namespaceTable.containsValue(namespaceIri)) { - String newPrefix = "zzzns" + String.format("%04d", namespaceIndex); // TODO zzzns? - namespaceTable.put(newPrefix, namespaceIri); - generatedNamespaceTable.put(newPrefix, namespaceIri); // track the namespace mappings created by the serializer - namespaceIndex += 1; - } - } - } + UnsortedTurtleObjectList oList = null; + if (poMap.containsKey(st.getPredicate())) { + oList = poMap.get(st.getPredicate()); + } else { + oList = new UnsortedTurtleObjectList(); + poMap.put(st.getPredicate(), oList); } - /** - * Signals the end of the RDF data. This method is called when all data has - * been reported. - * - * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. - */ - @Override - public void endRDF() throws RDFHandlerException { - try { - // !!!! Override method must set values for 'sortedOntologies', 'sortedTripleMap' & 'sortedBlankNodes' before calling this method - - // Create serialisation names for blank nodes. - String blankNodeNamePadding = prepareBlankNodeNamePadding(unsortedBlankNodes.size()); - - int blankNodeIndex = 0; - for (Value value : sortedBlankNodes) { - if (value instanceof BNode) { - BNode bnode = (BNode) value; - blankNodeIndex++; - String blankNodeName = Integer.toString(blankNodeIndex); - if (blankNodeName.length() < blankNodeNamePadding.length()) { - blankNodeName = blankNodeNamePadding.substring(0, blankNodeNamePadding.length() - blankNodeName.length()) + blankNodeName; - } - blankNodeName = "blank" + blankNodeName; - blankNodeNameMap.put(bnode, blankNodeName); - } - } - - populateListOfFirstPredicates(); - - // Add default namespace prefixes, if they haven't yet been defined. May fail if these prefixes have - // already been defined for different namespace IRIs. - addDefaultNamespacePrefixIfMissing(Constants.RDF_NS_URI, "rdf"); - addDefaultNamespacePrefixIfMissing(Constants.RDFS_NS_URI, "rdfs"); - addDefaultNamespacePrefixIfMissing(Constants.OWL_NS_URI, "owl"); - addDefaultNamespacePrefixIfMissing(Constants.XML_SCHEMA_NS_URI, "xs"); - - // Add any extra namespaces needed to make all predicates writeable as a QName. This is especially needed for RDF/XML. - addNamespacePrefixesForPredicates(); - - // Create reverse namespace table. - reverseNamespaceTable = new ReverseNamespaceTable(); - for (String prefix : namespaceTable.keySet()) { - String iri = namespaceTable.get(prefix); - reverseNamespaceTable.put(iri, prefix); - } - - // Create list of imports - SortedTurtleObjectList importList = new SortedTurtleObjectList(comparisonContext); - for (Resource subject : sortedOntologies) { - if (sortedTripleMap.containsKey(subject)) { - SortedTurtlePredicateObjectMap poMap = sortedTripleMap.get(subject); - if (poMap.containsKey(Constants.OWL_IMPORTS)) { - SortedTurtleObjectList importsOList = poMap.get(Constants.OWL_IMPORTS); - importList.addAll(importsOList); - } - } - } - - // Write header information, including leading comments. - writeHeader(out, importList, leadingComments); - - // Track how many of the subjects have been written - int allSubjectCount = 0; - for (Resource subject : sortedOntologies) { - if (!(subject instanceof BNode)) { - allSubjectCount++; - } - } - for (Resource subject : sortedTripleMap.sortedKeys()) { - if (!sortedOntologies.contains(subject) && !(subject instanceof BNode)) { - allSubjectCount++; - } - } - for (Resource resource : sortedBlankNodes) { - if (!inlineBlankNodes || !objectBlankNodes.contains(resource)) { - BNode bnode = (BNode) resource; - if (unsortedTripleMap.containsKey(bnode)) { - allSubjectCount++; - } - } - } - - // Write out subjects which are unsortedOntologies. - // TODO Above comment is probably misleading. Here we write down triples by a subject - int subjectCount = 0; - for (Resource subject : sortedOntologies) { - if (!(subject instanceof BNode)) { - subjectCount++; - writeSubjectTriples(out, subject); - if (subjectCount < allSubjectCount) { - writeSubjectSeparator(out); - } - } - } - - // Write out all other subjects (not unsortedOntologies; also not blank nodes). - for (Resource subject : sortedTripleMap.sortedKeys()) { - if (!sortedOntologies.contains(subject) && !(subject instanceof BNode)) { - subjectCount++; - writeSubjectTriples(out, subject); - if (subjectCount < allSubjectCount) { writeSubjectSeparator(out); } - } - } - - // Write out blank nodes that are subjects, if blank nodes are not being inlined or if the blank node is not an object. - for (Resource resource : sortedBlankNodes) { - if (!inlineBlankNodes || !objectBlankNodes.contains(resource)) { - BNode bnode = (BNode) resource; - if (unsortedTripleMap.containsKey(bnode)) { - subjectCount++; - writeSubjectTriples(out, bnode); - if (subjectCount < allSubjectCount) { - writeSubjectSeparator(out); - } - } - } - } - - // Write footer information, including any trailing comments. - writeFooter(out, trailingComments); - - out.flush(); - } catch (Throwable t) { - throw new RDFHandlerException("unable to generate/write RDF output", t); - } + if (!oList.contains(st.getObject())) { + oList.add(st.getObject()); } - private String prepareBlankNodeNamePadding(int numberOfBlankNodes) { - StringBuilder blankNodeNamePaddingBuilder = new StringBuilder(); - blankNodeNamePaddingBuilder.append("0"); - int blankNodeCount = numberOfBlankNodes; - while (blankNodeCount > 9) { - blankNodeCount /= 10; - blankNodeNamePaddingBuilder.append("0"); - } - return blankNodeNamePaddingBuilder.toString(); + // Note subjects which are OWL ontologies, as the are handled before other subjects. + if (st.getPredicate().equals(Constants.RDF_TYPE) && + st.getObject().equals(Constants.owlOntology) && + !unsortedOntologies.contains((st.getSubject()))) { + unsortedOntologies.add(st.getSubject()); } - private void populateListOfFirstPredicates() { - // Set up list of predicates that appear first under their subjects. - firstPredicates = new ArrayList<>(); // predicates that are specially rendered first - firstPredicates.add(Constants.RDF_TYPE); - firstPredicates.add(Constants.RDFS_SUB_CLASS_OF); - firstPredicates.add(Constants.RDFS_SUB_PROPERTY_OF); - firstPredicates.add(Constants.OWL_SAME_AS); - firstPredicates.add(Constants.RDFS_LABEL); - firstPredicates.add(Constants.RDFS_COMMENT); - firstPredicates.add(Constants.OWL_ON_PROPERTY); - firstPredicates.add(Constants.OWL_ON_CLASS); + // Note subjects & objects which are blank nodes. + if (st.getSubject() instanceof BNode) { + unsortedBlankNodes.add(st.getSubject()); } - - /** - * Handles a statement. - * - * @param st The statement. - * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. - */ - @Override - public void handleStatement(Statement st) throws RDFHandlerException { - // Store the predicate. - allPredicates.add(st.getPredicate()); - - // Store the object if it is a blank node. - if (st.getObject() instanceof BNode) { - objectBlankNodes.add((BNode) st.getObject()); - } - - // Store the statement in the main 'triple map'. - UnsortedTurtlePredicateObjectMap poMap = null; - if (unsortedTripleMap.containsKey(st.getSubject())) { - poMap = unsortedTripleMap.get(st.getSubject()); - } else { - poMap = new UnsortedTurtlePredicateObjectMap(); - unsortedTripleMap.put(st.getSubject(), poMap); - } - - UnsortedTurtleObjectList oList = null; - if (poMap.containsKey(st.getPredicate())) { - oList = poMap.get(st.getPredicate()); - } else { - oList = new UnsortedTurtleObjectList(); - poMap.put(st.getPredicate(), oList); - } - - if (!oList.contains(st.getObject())) { - oList.add(st.getObject()); - } - - // Note subjects which are OWL ontologies, as the are handled before other subjects. - if (st.getPredicate().equals(Constants.RDF_TYPE) && - st.getObject().equals(Constants.owlOntology) && - !unsortedOntologies.contains((st.getSubject()))) { - unsortedOntologies.add(st.getSubject()); - } - - // Note subjects & objects which are blank nodes. - if (st.getSubject() instanceof BNode) { - unsortedBlankNodes.add(st.getSubject()); - } - if ((st.getObject() instanceof BNode) && !unsortedBlankNodes.contains(((BNode) st.getObject()))) { - unsortedBlankNodes.add((BNode)st.getObject()); - } + if ((st.getObject() instanceof BNode) && !unsortedBlankNodes.contains(((BNode) st.getObject()))) { + unsortedBlankNodes.add((BNode) st.getObject()); } - - /** - * Handles a comment. - * - * @param comment The comment. - * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. - */ - @Override - public void handleComment(String comment) throws RDFHandlerException { - // NOTE: comments are suppressed, as it isn't clear how to sort them sensibly with triples. + } + + /** + * Handles a comment. + * + * @param comment The comment. + * @throws org.eclipse.rdf4j.rio.RDFHandlerException If the RDF handler has encountered an unrecoverable error. + */ + @Override + public void handleComment(String comment) throws RDFHandlerException { + // NOTE: comments are suppressed, as it isn't clear how to sort them sensibly with triples. + } + + protected String convertQNameToString(QName qname, boolean useTurtleQuoting) { + if (qname == null) { + return "null"; + } else if (qname.getPrefix() != null) { + return qname.getPrefix() + ":" + qname.getLocalPart(); + } else { + return (useTurtleQuoting ? "<" : "") + + qname.getNamespaceURI() + qname.getLocalPart() + + (useTurtleQuoting ? ">" : ""); } - - protected String convertQNameToString(QName qname, boolean useTurtleQuoting) { - if (qname == null) { - return "null"; - } else if (qname.getPrefix() != null) { - return qname.getPrefix() + ":" + qname.getLocalPart(); - } else { - return (useTurtleQuoting ? "<" : "") + - qname.getNamespaceURI() + qname.getLocalPart() + - (useTurtleQuoting ? ">" : ""); - } + } + + protected String convertVerbIriToString(IRI iri, + boolean useGeneratedPrefixes, + boolean useTurtleQuoting, + boolean useJsonLdQuoting) throws Exception { + if (Constants.RDF_TYPE.equals(iri)) { + if (useTurtleQuoting) { + return "a"; + } + if (useJsonLdQuoting) { + return "@type"; + } } - - protected String convertVerbIriToString(IRI iri, - boolean useGeneratedPrefixes, - boolean useTurtleQuoting, - boolean useJsonLdQuoting) throws Exception { - if (Constants.RDF_TYPE.equals(iri)) { - if (useTurtleQuoting) { return "a"; } - if (useJsonLdQuoting) { return "@type"; } - } - return convertIriToString(iri, useGeneratedPrefixes, - useTurtleQuoting, useJsonLdQuoting); + return convertIriToString(iri, useGeneratedPrefixes, + useTurtleQuoting, useJsonLdQuoting); + } + + protected String convertIriToString(IRI iri, + boolean useGeneratedPrefixes, + boolean useTurtleQuoting, + boolean useJsonLdQuoting) throws Exception { + if (ShortIriPreferences.PREFIX.equals(shortIriPreference)) { + // return the IRI out as a QName if possible. + QName qname = convertIriToQName(iri, useGeneratedPrefixes); + if (qname != null) { + return convertQNameToString(qname, useTurtleQuoting); + } else { // return the IRI relative to the base IRI, if possible. + return convertIriToRelativeIri(iri, useTurtleQuoting); + } } - - protected String convertIriToString(IRI iri, - boolean useGeneratedPrefixes, - boolean useTurtleQuoting, - boolean useJsonLdQuoting) throws Exception { - if (ShortIriPreferences.PREFIX.equals(shortIriPreference)) { - // return the IRI out as a QName if possible. - QName qname = convertIriToQName(iri, useGeneratedPrefixes); - if (qname != null) { - return convertQNameToString(qname, useTurtleQuoting); - } else { // return the IRI relative to the base IRI, if possible. - return convertIriToRelativeIri(iri, useTurtleQuoting); - } - } - if (ShortIriPreferences.BASE_IRI.equals(shortIriPreference)) { - // return the IRI relative to the base URI, if possible. - String relativeIri = convertIriToRelativeIri(iri, useTurtleQuoting); - - // check if the relative URI is shortened, or not - if (!relativeIri.contains(iri.stringValue())) { - return relativeIri; - } else { - // return the IRI out as a QName if possible. - QName qname = convertIriToQName(iri, useGeneratedPrefixes); - if (qname != null) { - return convertQNameToString(qname, useTurtleQuoting); - } else { // return the absolute IRI - return String.format("%s%s%s", - useTurtleQuoting ? "<" : "", - iri.stringValue(), - useTurtleQuoting ? ">" : "" - ); - } - } + if (ShortIriPreferences.BASE_IRI.equals(shortIriPreference)) { + // return the IRI relative to the base URI, if possible. + String relativeIri = convertIriToRelativeIri(iri, useTurtleQuoting); + + // check if the relative URI is shortened, or not + if (!relativeIri.contains(iri.stringValue())) { + return relativeIri; + } else { + // return the IRI out as a QName if possible. + QName qname = convertIriToQName(iri, useGeneratedPrefixes); + if (qname != null) { + return convertQNameToString(qname, useTurtleQuoting); + } else { // return the absolute IRI + return String.format("%s%s%s", + useTurtleQuoting ? "<" : "", + iri.stringValue(), + useTurtleQuoting ? ">" : "" + ); } - return String.format("%s%s%s", - useTurtleQuoting ? "<" : "", - iri.stringValue(), - useTurtleQuoting ? ">" : "" - ); // if nothing else, do this + } } - - /** Compares a sorted triple map to the unsorted triple map from which it was created, - * to check that no triples were lost. Prints detailed information is a triple loss is detected. - */ - protected void compareSortedToUnsortedTripleMap(SortedTurtleSubjectPredicateObjectMap sortedTripleMap, - UnsortedTurtleSubjectPredicateObjectMap unsortedTripleMap, - String label) { - if (sortedTripleMap.fullSize() != unsortedTripleMap.fullSize()) { - unsortedTripleMap.toSorted(Value.class, comparisonContext); // generate BN-to-BN debugging - if (sortedTripleMap.size() != unsortedTripleMap.size()) { - System.err.println("**** " + label + ": subjects unexpectedly lost or gained during sorting: " + sortedTripleMap.fullSize() + " != " + unsortedTripleMap.fullSize()); - } - } + return String.format("%s%s%s", + useTurtleQuoting ? "<" : "", + iri.stringValue(), + useTurtleQuoting ? ">" : "" + ); // if nothing else, do this + } + + /** + * Compares a sorted triple map to the unsorted triple map from which it was created, to check that no triples were + * lost. Prints detailed information is a triple loss is detected. + */ + protected void compareSortedToUnsortedTripleMap(SortedTurtleSubjectPredicateObjectMap sortedTripleMap, + UnsortedTurtleSubjectPredicateObjectMap unsortedTripleMap, + String label) { + if (sortedTripleMap.fullSize() != unsortedTripleMap.fullSize()) { + unsortedTripleMap.toSorted(Value.class, comparisonContext); // generate BN-to-BN debugging + if (sortedTripleMap.size() != unsortedTripleMap.size()) { + System.err.println( + "**** " + label + ": subjects unexpectedly lost or gained during sorting: " + sortedTripleMap.fullSize() + + " != " + unsortedTripleMap.fullSize()); + } } + } - protected abstract void writeHeader(Writer out, SortedTurtleObjectList importList, String[] leadingComments) - throws Exception; + protected abstract void writeHeader(Writer out, SortedTurtleObjectList importList, String[] leadingComments) + throws Exception; - protected abstract void writeSubjectTriples(Writer out, Resource subject) throws Exception; + protected abstract void writeSubjectTriples(Writer out, Resource subject) throws Exception; - protected abstract void writeSubjectSeparator(Writer out) throws Exception; + protected abstract void writeSubjectSeparator(Writer out) throws Exception; - protected abstract void writePredicateAndObjectValues(Writer out, IRI predicate, Collection values) - throws Exception; + protected abstract void writePredicateAndObjectValues(Writer out, IRI predicate, Collection values) + throws Exception; - protected abstract void writeFooter(Writer out, String[] trailingComments) throws Exception; + protected abstract void writeFooter(Writer out, String[] trailingComments) throws Exception; } \ No newline at end of file diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfXmlWriter.java b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfXmlWriter.java index b66f255..480d74f 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfXmlWriter.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedRdfXmlWriter.java @@ -37,6 +37,7 @@ import static org.edmcouncil.rdf_toolkit.util.Constants.rdfLangString; import static org.edmcouncil.rdf_toolkit.util.Constants.rdfParseType; import static org.edmcouncil.rdf_toolkit.util.Constants.xsString; + import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; @@ -45,6 +46,7 @@ import org.eclipse.rdf4j.rio.RDFHandlerException; import org.edmcouncil.rdf_toolkit.model.SortedTurtleObjectList; import org.edmcouncil.rdf_toolkit.model.SortedTurtlePredicateObjectMap; +import org.edmcouncil.rdf_toolkit.util.Constants; import org.edmcouncil.rdf_toolkit.util.StringDataTypeOptions; import java.io.OutputStream; import java.io.Writer; @@ -72,6 +74,11 @@ public class SortedRdfXmlWriter extends SortedRdfWriter { // RDF/XML only allows "resources" in RDF collections private static final Class COLLECTION_CLASS = Resource.class; + /** + * RDF Types that are preferred to be used first. + */ + private final List preferredRdfTypes = new ArrayList<>(PREFERRED_RDF_TYPES); + /** Output stream for this RDF/XML writer. */ // Note: this is an internal Java class, not part of the published API. But easier than writing our own indenter // here. @@ -80,6 +87,7 @@ public class SortedRdfXmlWriter extends SortedRdfWriter { /** Namespace prefix for the RDF namespace. */ private String rdfPrefix = "rdf"; + /** * Creates an RDFWriter instance that will write sorted RDF/XML to the supplied output stream. * @@ -134,6 +142,10 @@ public SortedRdfXmlWriter(Writer writer, Map options) { */ @Override public void endRDF() throws RDFHandlerException { + if (suppressNamedIndividuals) { + preferredRdfTypes.remove(Constants.owlNamedIndividual); + } + try { // Sort triples, etc. sortedOntologies = unsortedOntologies.toSorted(COLLECTION_CLASS, comparisonContext); @@ -255,11 +267,13 @@ protected void writeSubjectTriples(Writer out, Resource subject) throws Exceptio } // Try to determine whether to use or an element based on rdf:type value. - SortedTurtleObjectList subjectRdfTypes = poMap.get(RDF_TYPE); // needed to determine if a type can be used as the XML element name + // Needed to determine if a type can be used as the XML element name: + SortedTurtleObjectList subjectRdfTypes = poMap.get(RDF_TYPE); if (subjectRdfTypes != null) { // make a copy so we can remove values safely subjectRdfTypes = (SortedTurtleObjectList) subjectRdfTypes.clone(); } - if ((subjectRdfTypes != null) && (subjectRdfTypes.size() >= 2) && subjectRdfTypes.contains(owlThing)) { // ignore owl:Thing for the purposes of determining what type to use an an element name in RDF/XML + // ignore owl:Thing for the purposes of determining what type to use an element name in RDF/XML + if ((subjectRdfTypes != null) && (subjectRdfTypes.size() >= 2)) { subjectRdfTypes.remove(owlThing); } IRI enclosingElementIRI = rdfDescription; // default value @@ -274,7 +288,9 @@ protected void writeSubjectTriples(Writer out, Resource subject) throws Exceptio } } } - if ((rdfDescription.equals(enclosingElementIRI)) && (subjectRdfTypes != null) && (subjectRdfTypes.size() == 1)) { // if no preferred type, use the type for the XML element tag, if there is only a single rdf:type + + // If no preferred type, use the type for the XML element tag, if there is only a single rdf:type + if ((rdfDescription.equals(enclosingElementIRI)) && (subjectRdfTypes != null) && (subjectRdfTypes.size() == 1)) { Value subjectRdfTypeValue = subjectRdfTypes.first(); if (subjectRdfTypeValue instanceof IRI) { QName subjectRdfTypeQName = convertIriToQName((IRI) subjectRdfTypeValue, USE_GENERATED_PREFIXES); diff --git a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedTurtleWriter.java b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedTurtleWriter.java index 6e21061..23c753d 100644 --- a/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedTurtleWriter.java +++ b/src/main/java/org/edmcouncil/rdf_toolkit/writer/SortedTurtleWriter.java @@ -254,7 +254,7 @@ protected void writeSubjectTriples(Writer out, Resource subject) throws Exceptio List valuesList = new ArrayList<>(); if (values != null && !values.isEmpty()) { if (predicate == Constants.RDF_TYPE) { - for (IRI preferredType : preferredRdfTypes) { + for (IRI preferredType : PREFERRED_RDF_TYPES) { if (values.contains(preferredType)) { valuesList.add(preferredType); values.remove(preferredType); diff --git a/src/test/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandlerTest.java b/src/test/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandlerTest.java new file mode 100644 index 0000000..0f1324d --- /dev/null +++ b/src/test/java/org/edmcouncil/rdf_toolkit/runner/CommandLineArgumentsHandlerTest.java @@ -0,0 +1,108 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Enterprise Data Management Council + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package org.edmcouncil.rdf_toolkit.runner; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.FileNotFoundException; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.commons.cli.ParseException; +import org.edmcouncil.rdf_toolkit.runner.exception.RdfToolkitOptionHandlingException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +class CommandLineArgumentsHandlerTest { + + private static final String[] REQUIRED_ARGS = new String[]{"--source-format", "rdf-xml"}; + + @ParameterizedTest + @MethodSource("getTestParams") + void runTests(TestParam testParam) throws RdfToolkitOptionHandlingException, FileNotFoundException, ParseException { + var commandLineArgumentsHandler = new CommandLineArgumentsHandler(); + var allArgs = getArgsWithRequired(testParam.getArgs()); + var rdfToolkitOptions = commandLineArgumentsHandler.handleArguments(allArgs); + + var fieldValue = getFieldValue(rdfToolkitOptions, testParam.getFieldName()); + + assertEquals(testParam.getExpectedValue(), fieldValue); + } + + private String[] getArgsWithRequired(String[] args) { + int finalLength = REQUIRED_ARGS.length + args.length; + List argsList = new ArrayList<>(finalLength); + Collections.addAll(argsList, REQUIRED_ARGS); + Collections.addAll(argsList, args); + return argsList.toArray(new String[finalLength]); + } + + static List getTestParams() { + return List.of( + new TestParam(new String[]{"--line-end", "\r\n"}, "lineEnd", "\r\n"), + new TestParam(new String[]{""}, "lineEnd", "\n"), + new TestParam(new String[]{"--omit-xmlns-namespace"}, "omitXmlnsNamespace", true), + new TestParam(new String[]{""}, "omitXmlnsNamespace", false), + new TestParam(new String[]{"--suppress-named-individuals"}, "suppressNamedIndividuals", true), + new TestParam(new String[]{""}, "suppressNamedIndividuals", false) + ); + } + + static Object getFieldValue(RdfToolkitOptions rdfToolkitOptions, String fieldName) { + try { + Field field = rdfToolkitOptions.getClass().getDeclaredField(fieldName); + field.setAccessible(true); + + return field.get(rdfToolkitOptions); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new IllegalArgumentException("Unable to get field value for field with name " + fieldName, e); + } + } + + private static class TestParam { + + private final String[] args; + private final String fieldName; + private final Object expectedValue; + + public TestParam(String[] args, String fieldName, Object expectedValue) { + this.args = args; + this.fieldName = fieldName; + this.expectedValue = expectedValue; + } + + public String[] getArgs() { + return args; + } + + public String getFieldName() { + return fieldName; + } + + public Object getExpectedValue() { + return expectedValue; + } + } +}