diff --git a/services/linkchecker/src/main/java/net/geocat/database/linkchecker/entities/helper/DocumentLink.java b/services/linkchecker/src/main/java/net/geocat/database/linkchecker/entities/helper/DocumentLink.java index efa442a..418baa4 100644 --- a/services/linkchecker/src/main/java/net/geocat/database/linkchecker/entities/helper/DocumentLink.java +++ b/services/linkchecker/src/main/java/net/geocat/database/linkchecker/entities/helper/DocumentLink.java @@ -38,8 +38,11 @@ import javax.persistence.Column; import javax.persistence.MappedSuperclass; import javax.persistence.Transient; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; //Represents a link in a document @MappedSuperclass @@ -83,25 +86,48 @@ public DocumentLink() { //-- - public static List validProtocols = Arrays.asList(new String[]{ + public static List validViewProtocols = Arrays.asList(new String[]{ + "wms", "http://www.opengis.net/def/serviceType/ogc/wms".toLowerCase(), - "http://www.opengis.net/def/serviceType/ogc/wmts".toLowerCase(), - "http://www.opengis.net/def/serviceType/ogc/wfs".toLowerCase(), - "https://tools.ietf.org/html/rfc4287".toLowerCase(), - "ATOM Syndication Format".toLowerCase(), - "OGC Web Feature Service".toLowerCase(), "OGC Web Map Service".toLowerCase(), - "OGC Web Map Tile Service".toLowerCase(), - "wms", + "Web Map Service (WMS)".toLowerCase(), + "OGC:WMS".toLowerCase(), + "http://www.opengeospatial.org/standards/wms", "wmts", + "http://www.opengis.net/def/serviceType/ogc/wmts".toLowerCase(), + "OGC Web Map Tile Service".toLowerCase(), + "OGC:WMTS".toLowerCase(), + "http://www.opengeospatial.org/standards/wmts" + }); + + public static List validDownloadProtocols = Arrays.asList(new String[]{ "wfs", - "atom", - "http://www.opengeospatial.org/standards/wms", - "http://www.opengeospatial.org/standards/wmts", + "http://www.opengis.net/def/serviceType/ogc/wfs".toLowerCase(), + "OGC Web Feature Service".toLowerCase(), + "Web Feature Service (WFS)".toLowerCase(), + "OGC:WFS".toLowerCase(), "http://www.opengeospatial.org/standards/wfs", - "INSPIRE Atom".toLowerCase() + "atom", + "https://tools.ietf.org/html/rfc4287".toLowerCase(), + "ATOM Syndication Format".toLowerCase(), + "INSPIRE Atom".toLowerCase(), + "wcs", + "OGC:WCS".toLowerCase(), + "http://www.opengis.net/def/serviceType/ogc/wcs".toLowerCase(), + "api features", + "OGC - API Features".toLowerCase(), + "OGC:OGC-API-Features-items".toLowerCase(), + "HTTP:OGC:API-Features".toLowerCase(), + "http://www.opengis.net/def/interface/ogcapi-features".toLowerCase(), + "SensorThings".toLowerCase(), + "sos", + "OGC:SOS".toLowerCase(), + "http://www.opengis.net/def/serviceType/ogc/sos".toLowerCase() }); + public static List validProtocols = Stream.concat(validViewProtocols.stream(), + validDownloadProtocols.stream()).collect(Collectors.toList()); + public static List validAtomProtocols = Arrays.asList(new String[]{ "https://tools.ietf.org/html/rfc4287".toLowerCase(), "ATOM Syndication Format".toLowerCase(), @@ -116,12 +142,39 @@ public DocumentLink() { "http://inspire.ec.europa.eu/metadata-codelist/SpatialDataServiceType/view".toLowerCase() }); + public static final String VALID_PROTOCOLS_VIEW_REGEX = "(.*wms.*|.*wmts.*|.*web map service.*)"; + + public static final String VALID_PROTOCOLS_DOWNLOAD_REGEX = "(.*wfs.*|.*atom.*|.*wcs.*|.*sos.*|.*api.*feature.*|.*sensorthings.*|.*web feature service.*)"; + + public static final String VALID_PROTOCOLS_REGEX = "(.*wfs.*|.*atom.*|.*wcs.*|.*sos.*|.*api.*feature.*|.*sensorthings.*|.*wms.*|.*wmts.*|.*web map service.*|.*web feature service.*)"; + public boolean isInspireSimplifiedLink() { - if ((rawURL == null) || (protocol == null) || (applicationProfile == null)) + // Relax the check to process links with the applicationProfile information + if ((rawURL == null) || (protocol == null)) return false; - if (rawURL.isEmpty() || protocol.isEmpty() || applicationProfile.isEmpty()) + if (rawURL.isEmpty() || protocol.isEmpty()) return false; + if (!validProtocols.contains(protocol.toLowerCase())) { + // Check protocol match "simple" values instead of exact match + if (!protocol.toLowerCase().matches(VALID_PROTOCOLS_REGEX)) { + return false; + } + } + + + return true; + } + + + /*public boolean isInspireSimplifiedLink() { + if ((rawURL == null) || (protocol == null) || (applicationProfile == null)) + if ((rawURL == null) || (protocol == null)) + return false; + if (rawURL.isEmpty() || protocol.isEmpty() || applicationProfile.isEmpty()) + if (rawURL.isEmpty() || protocol.isEmpty()) + return false; + if (!validProtocols.contains(protocol.toLowerCase())) return false; @@ -129,7 +182,8 @@ public boolean isInspireSimplifiedLink() { return false; return true; - } + }*/ + //-- diff --git a/services/linkchecker/src/main/java/net/geocat/database/linkchecker/service/ServiceDocumentLinkService.java b/services/linkchecker/src/main/java/net/geocat/database/linkchecker/service/ServiceDocumentLinkService.java index 430e00e..baffacc 100644 --- a/services/linkchecker/src/main/java/net/geocat/database/linkchecker/service/ServiceDocumentLinkService.java +++ b/services/linkchecker/src/main/java/net/geocat/database/linkchecker/service/ServiceDocumentLinkService.java @@ -72,11 +72,68 @@ public DatasetDocumentLink create(DatasetMetadataRecord datasetMetadataRecord, O result.setFunction(onlineResource.getFunction()); result.setOperationName(onlineResource.getOperationName()); result.setRawURL(onlineResource.getRawURL()); - result.setProtocol(onlineResource.getProtocol()); + + String protocolFromUrl = inferProtocolFromUrl(onlineResource.getRawURL()); + + if ((onlineResource.getProtocol() == null) && (protocolFromUrl != null)) { + // If no protocol defined, try to infer the protocol from the URL + result.setProtocol(protocolFromUrl); + } else { + result.setProtocol(onlineResource.getProtocol()); + + // if the XML document's protocol isn't compatible with the actual URL + // then use the inferred URL protocol. + // Example; + // xml protocol is WMS (view) + // but, url is "...?service=WFS" (inferred url protocol is download and not view) + // then, set the protocol to Download (ignore the XML) + if (protocolFromUrl != null) { + boolean isDownloadProtocol = ServiceDocumentLink.validDownloadProtocols.contains(onlineResource.getProtocol().toLowerCase()); + boolean isDownloadUrlProtocol = ServiceDocumentLink.validDownloadProtocols.contains(protocolFromUrl.toLowerCase()); + boolean isViewProtocol = ServiceDocumentLink.validViewProtocols.contains(onlineResource.getProtocol().toLowerCase()); + boolean isViewUrlProtocol = ServiceDocumentLink.validViewProtocols.contains(protocolFromUrl); + + if (isDownloadProtocol) { + if (!isDownloadUrlProtocol && isViewUrlProtocol) { + result.setProtocol(protocolFromUrl); + } + } else if (isViewProtocol) { + if (!isViewUrlProtocol && isDownloadUrlProtocol) { + result.setProtocol(protocolFromUrl); + } + } + } + } + result.setApplicationProfile(onlineResource.getApplicationProfile()); result.setLinkCheckJobId(datasetMetadataRecord.getLinkCheckJobId()); return result; } + + + private String inferProtocolFromUrl(String url) { + String normalizedUrl = url.toLowerCase(); + + if (normalizedUrl.indexOf("wms") > -1) { + return "wms"; + } else if (normalizedUrl.indexOf("wmts") > -1) { + return "wmts"; + } else if (normalizedUrl.indexOf("wfs") > -1) { + return "wfs"; + } else if (normalizedUrl.indexOf("atom") > -1) { + return "atom"; + } else if (normalizedUrl.indexOf("wcs") > -1) { + return "wcs"; + } else if (normalizedUrl.indexOf("sos") > -1) { + return "sos"; + } else if (normalizedUrl.indexOf("api features") > -1) { + return "api features"; + } else if (normalizedUrl.indexOf("sensorthings") > -1) { + return "sensorthings"; + } + + return null; + } } diff --git a/services/linkchecker/src/main/java/net/geocat/eventprocessor/processors/postprocess/EventProcessor_PostProcessDatasetDocumentEvent.java b/services/linkchecker/src/main/java/net/geocat/eventprocessor/processors/postprocess/EventProcessor_PostProcessDatasetDocumentEvent.java index 2ff8816..beb4df8 100644 --- a/services/linkchecker/src/main/java/net/geocat/eventprocessor/processors/postprocess/EventProcessor_PostProcessDatasetDocumentEvent.java +++ b/services/linkchecker/src/main/java/net/geocat/eventprocessor/processors/postprocess/EventProcessor_PostProcessDatasetDocumentEvent.java @@ -323,13 +323,40 @@ private void process() { .filter(x -> (x.getCapabilitiesDocumentType() == CapabilitiesType.WFS) || (x.getCapabilitiesDocumentType() == CapabilitiesType.Atom)) .collect(Collectors.toList()); - if (!viewLinks.isEmpty()) + + if (!viewLinks.isEmpty()) { localDatasetMetadataRecord.setINDICATOR_VIEW_LINK_TO_DATA(IndicatorStatus.PASS); - if (!downloadLinks.isEmpty()) - localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS); + localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinks.size()); + } else { + // Dataset link simplification + if (!localDatasetMetadataRecord.getDocumentLinks().isEmpty()) { + List viewLinksMetadataOnlineResources = localDatasetMetadataRecord.getDocumentLinks().stream() + .filter(x -> (x.getLinkState().equals(LinkState.Complete) && x.getLinkHTTPStatusCode() == 200) && (DocumentLink.validViewProtocols.contains(x.getProtocol().toLowerCase()) || x.getProtocol().toLowerCase().matches(DocumentLink.VALID_PROTOCOLS_VIEW_REGEX))) + .collect(Collectors.toList()); + + if (!viewLinksMetadataOnlineResources.isEmpty()) { + localDatasetMetadataRecord.setINDICATOR_VIEW_LINK_TO_DATA(IndicatorStatus.PASS); + localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinksMetadataOnlineResources.size()); + } + } + } - localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinks.size()); - localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinks.size()); + if (!downloadLinks.isEmpty()) { + localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS); + localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinks.size()); + } else { + // Dataset link simplification + if (!localDatasetMetadataRecord.getDocumentLinks().isEmpty()) { + List downloadLinksMetadataOnlineResources = localDatasetMetadataRecord.getDocumentLinks().stream() + .filter(x -> (x.getLinkState().equals(LinkState.Complete) && x.getLinkHTTPStatusCode() == 200) && (DocumentLink.validDownloadProtocols.contains(x.getProtocol().toLowerCase()) || x.getProtocol().toLowerCase().matches(DocumentLink.VALID_PROTOCOLS_DOWNLOAD_REGEX))) + .collect(Collectors.toList()); + + if (!downloadLinksMetadataOnlineResources.isEmpty()) { + localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS); + localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinksMetadataOnlineResources.size()); + } + } + } // List serviceLinks = new ArrayList<>(); // List capLinks = new ArrayList<>();