Merge branch 'microsoft:main' into abyss638/create-action-to-test

abyss638 · Oct 23, 2023 · 489b3d3 · 489b3d3
2 parents ff74082 + b1fc271
commit 489b3d3
Show file tree

Hide file tree

Showing 12 changed files with 127 additions and 54 deletions.
diff --git a/README.md b/README.md
@@ -196,7 +196,7 @@ optional arguments:
   --trim PREFIX         Prefix to strip from issue paths, e.g. the checkout directory on the build agent
 ```
 
-Write out a JSON file of Code Climate tool format from [a set of] SARIF files.  
+Write out a JSON file of Code Climate tool format from [a set of] SARIF files.
 This can then be published as a Code Quality report artefact in a GitLab pipeline and shown in GitLab UI for merge requests.
 
 The JSON output can also be filtered using the blame information; see
@@ -629,18 +629,18 @@ configuration:
   # This option controls whether to include results where a property to check is missing, default value is true.
   default-include: false
 
-# Items in `include` list are interpreted as inclusion filtering rules. 
+# Items in `include` list are interpreted as inclusion filtering rules.
 # Items are treated with OR operator, the filtered results includes objects matching any rule.
 # Each item can be one rule or a list of rules, in the latter case rules in the list are treated with AND operator - all rules must match.
 include:
-  # The following line includes issues whose author-mail property contains "@microsoft.com" AND found in Java files. 
+  # The following line includes issues whose author-mail property contains "@microsoft.com" AND found in Java files.
   # Values with special characters `\:;_()$%^@,` must be enclosed in quotes (single or double):
   - author-mail: "@microsoft.com"
     locations[*].physicalLocation.artifactLocation.uri: "*.java"
-  # Instead of a substring, a regular expression can be used, enclosed in "/" characters.  
-  # Issues whose committer-mail property includes a string matching the regular expression are included.  
+  # Instead of a substring, a regular expression can be used, enclosed in "/" characters.
+  # Issues whose committer-mail property includes a string matching the regular expression are included.
   # Use ^ and $ to match the whole committer-mail property.
-  - committer-mail: 
+  - committer-mail:
       value: "/^<myname.*\\.com>$/"
       # Configuration options can be overriden for any rule.
       default-include: true
@@ -649,7 +649,7 @@ include:
 exclude:
   # The following line excludes issues whose location is in test Java files with names starting with the "Test" prefix.
   - location: "Test*.java"
-  # The value for the property can be empty, in this case only existence of the property is checked. 
+  # The value for the property can be empty, in this case only existence of the property is checked.
   - suppression:
 ```
 
@@ -737,6 +737,7 @@ attributes defined in `sarif_file.RECORD_ATTRIBUTES`.
 - `"Severity"` - the SARIF severity for the record.  One of `error`, `warning` (the default if the
   record doesn't specify) or `note`.
 - `"Code"` - the issue code from the result.
+- `"Description"` - the issue name from the result - corresponding to the Code.
 - `"Location"` - the location of the issue, typically the file containing the issue.  Format varies
   by tool.
 - `"Line"` - the line number in the file where the issue occurs.  Value is a string.  This defaults

diff --git a/sarif/filter/filter_stats.py b/sarif/filter/filter_stats.py
@@ -1,9 +1,13 @@
+"""
+Statistics that record the outcome of a filter.
+"""
+
 import datetime
 
 
 class FilterStats:
     """
-    Statistics that record the outcome of a a filter.
+    Statistics that record the outcome of a filter.
     """
 
     def __init__(self, filter_description):

diff --git a/sarif/filter/general_filter.py b/sarif/filter/general_filter.py
@@ -1,3 +1,7 @@
+"""
+SARIF file filtering functionality.
+"""
+
 import os
 import re
 from typing import Optional, List
@@ -31,16 +35,16 @@
 
 
 def get_filter_function(filter_spec):
+    """Return a filter function for the given specification."""
     if filter_spec:
         filter_len = len(filter_spec)
         if filter_len > 2 and filter_spec.startswith("/") and filter_spec.endswith("/"):
             regex = filter_spec[1:-1]
             return lambda value: re.search(regex, value, re.IGNORECASE)
-        else:
-            substring = filter_spec
-            # substring can be empty, in this case "in" returns true
-            # and only existence of the property checked.
-            return lambda value: substring in value
+        substring = filter_spec
+        # substring can be empty, in this case "in" returns true
+        # and only existence of the property checked.
+        return lambda value: substring in value
     return lambda value: True
 
 
@@ -53,8 +57,8 @@ def _convert_glob_to_regex(property_name, property_value_spec):
         last_component = property_name.split(".")[-1]
         if last_component in FIELDS_REGEX_SHORTCUTS:
             shortcuts = FIELDS_REGEX_SHORTCUTS[last_component]
-            rx = re.compile("|".join(map(re.escape, shortcuts.keys())))
-            property_value_spec = rx.sub(
+            regex = re.compile("|".join(map(re.escape, shortcuts.keys())))
+            property_value_spec = regex.sub(
                 lambda match: shortcuts[match.group(0)], property_value_spec
             )
 
@@ -107,11 +111,13 @@ def _filter_append(self, filtered_results: List[dict], result: dict):
         # Remove any existing filter log on the result
         result.setdefault("properties", {}).pop("filtered", None)
 
-        included_stats = None
         if self.apply_inclusion_filter:
             included_stats = self._filter_result(result, self.include_filters)
             if not included_stats["matchedFilter"]:
                 return
+        else:
+            # no inclusion filters, mark the result as included so far
+            included_stats = {"state": "included", "matchedFilter": []}
 
         if self.apply_exclusion_filter:
             excluded_stats = self._filter_result(result, self.exclude_filters)

diff --git a/sarif/operations/codeclimate_op.py b/sarif/operations/codeclimate_op.py
@@ -56,8 +56,8 @@ def _write_to_json(list_of_errors, output_file):
         severity = _SEVERITIES.get(record.get("Severity", "warning"), "minor")
 
         # split Code value to extract error ID and description
-        rule = record["Code"].split(" ", 1)[0]
-        description = record["Code"][len(rule) + 1 :]
+        rule = record["Code"]
+        description = record["Description"]
 
         path = record["Location"]
         line = record["Line"]

diff --git a/sarif/operations/csv_op.py b/sarif/operations/csv_op.py
@@ -59,6 +59,6 @@ def _write_to_csv(file_or_files, output_file):
                 e for e in list_of_errors if e["Severity"] == severity
             ]
             sorted_errors_by_severity = sorted(
-                errors_of_severity, key=lambda x: x["Code"]
+                errors_of_severity, key=sarif_file.combine_code_and_description
             )
             writer.writerows(error_dict for error_dict in sorted_errors_by_severity)
diff --git a/sarif/operations/diff_op.py b/sarif/operations/diff_op.py
@@ -6,7 +6,7 @@
 import sys
 from typing import Dict
 
-from sarif.sarif_file import SarifFileSet, SARIF_SEVERITIES
+from sarif import sarif_file
 
 
 def _occurrences(occurrence_count):
@@ -24,7 +24,10 @@ def _record_to_location_tuple(record) -> str:
 
 
 def print_diff(
-    original_sarif: SarifFileSet, new_sarif: SarifFileSet, output, check_level=None
+    original_sarif: sarif_file.SarifFileSet,
+    new_sarif: sarif_file.SarifFileSet,
+    output,
+    check_level=None,
 ) -> str:
     """
     Generate a diff of the issues from the SARIF files and write it to stdout
@@ -38,7 +41,7 @@ def print_diff(
         with open(output, "w", encoding="utf-8") as output_file:
             json.dump(diff, output_file, indent=4)
     else:
-        for severity in SARIF_SEVERITIES:
+        for severity in sarif_file.SARIF_SEVERITIES:
             if diff[severity]["codes"]:
                 print(
                     severity,
@@ -83,7 +86,7 @@ def print_diff(
         print(f"  'After' results were filtered by {filter_stats}")
     ret = 0
     if check_level:
-        for severity in SARIF_SEVERITIES:
+        for severity in sarif_file.SARIF_SEVERITIES:
             ret += diff.get(severity, {}).get("+", 0)
             if severity == check_level:
                 break
@@ -94,12 +97,16 @@ def print_diff(
     return ret
 
 
-def _find_new_occurrences(new_records, old_records, issue_code):
-    old_occurrences = [r for r in old_records if r["Code"] == issue_code]
+def _find_new_occurrences(new_records, old_records, issue_code_and_desc):
+    old_occurrences = [
+        r
+        for r in old_records
+        if sarif_file.combine_code_and_description(r) == issue_code_and_desc
+    ]
     new_occurrences_new_locations = []
     new_occurrences_new_lines = []
     for r in new_records:
-        if r["Code"] == issue_code:
+        if sarif_file.combine_code_and_description(r) == issue_code_and_desc:
             (new_location, new_line) = (True, True)
             for old_r in old_occurrences:
                 if old_r["Location"] == r["Location"]:
@@ -119,15 +126,17 @@ def _find_new_occurrences(new_records, old_records, issue_code):
     ) + sorted(new_occurrences_new_lines, key=_record_to_location_tuple)
 
 
-def calc_diff(original_sarif: SarifFileSet, new_sarif: SarifFileSet) -> Dict:
+def calc_diff(
+    original_sarif: sarif_file.SarifFileSet, new_sarif: sarif_file.SarifFileSet
+) -> Dict:
     """
     Generate a diff of the issues from the SARIF files.
     original_sarif corresponds to the old files.
     new_sarif corresponds to the new files.
     Return dict has keys "error", "warning", "note" and "all".
     """
     ret = {"all": {"+": 0, "-": 0}}
-    for severity in SARIF_SEVERITIES:
+    for severity in sarif_file.SARIF_SEVERITIES:
         original_histogram = dict(original_sarif.get_issue_code_histogram(severity))
         new_histogram = new_sarif.get_issue_code_histogram(severity)
         new_histogram_dict = dict(new_histogram)

diff --git a/sarif/operations/emacs_op.py b/sarif/operations/emacs_op.py
@@ -7,7 +7,7 @@
 
 from jinja2 import Environment, FileSystemLoader, select_autoescape
 
-from sarif.sarif_file import SarifFileSet
+from sarif import sarif_file
 
 _THIS_MODULE_PATH = os.path.dirname(__file__)
 
@@ -20,7 +20,7 @@
 
 
 def generate_compile(
-    input_files: SarifFileSet, output: str, output_multiple_files: bool
+    input_files: sarif_file.SarifFileSet, output: str, output_multiple_files: bool
 ):
     """
     Generate txt file from the input files.
@@ -96,10 +96,16 @@ def _generate_single_txt(input_file, output_file, date_val):
 def _enrich_details(histogram, records_of_severity):
     enriched_details = []
 
-    for error_code, count in histogram:
-        error_lines = [e for e in records_of_severity if e["Code"] == error_code]
+    for error_code_and_desc, count in histogram:
+        error_lines = [
+            e
+            for e in records_of_severity
+            if sarif_file.combine_code_and_description(e) == error_code_and_desc
+        ]
         lines = sorted(
             error_lines, key=lambda x: x["Location"] + str(x["Line"]).zfill(6)
         )
-        enriched_details.append({"code": error_code, "count": count, "details": lines})
+        enriched_details.append(
+            {"code": error_code_and_desc, "count": count, "details": lines}
+        )
     return enriched_details
diff --git a/sarif/operations/html_op.py b/sarif/operations/html_op.py
@@ -9,7 +9,7 @@
 from jinja2 import Environment, FileSystemLoader, select_autoescape
 
 from sarif import charts
-from sarif.sarif_file import SarifFileSet
+from sarif import sarif_file
 
 _THIS_MODULE_PATH = os.path.dirname(__file__)
 
@@ -22,7 +22,10 @@
 
 
 def generate_html(
-    input_files: SarifFileSet, image_file: str, output: str, output_multiple_files: bool
+    input_files: sarif_file.SarifFileSet,
+    image_file: str,
+    output: str,
+    output_multiple_files: bool,
 ):
     """
     Generate HTML file from the input files.
@@ -127,10 +130,16 @@ def _generate_single_html(
 def _enrich_details(histogram, records_of_severity):
     enriched_details = []
 
-    for error_code, count in histogram:
-        error_lines = (e for e in records_of_severity if e["Code"] == error_code)
+    for error_code_and_desc, count in histogram:
+        error_lines_generator = (
+            e
+            for e in records_of_severity
+            if sarif_file.combine_code_and_description(e) == error_code_and_desc
+        )
         lines = sorted(
-            error_lines, key=lambda x: x["Location"] + str(x["Line"]).zfill(6)
+            error_lines_generator, key=lambda x: x["Location"] + str(x["Line"]).zfill(6)
+        )
+        enriched_details.append(
+            {"code": error_code_and_desc, "count": count, "details": lines}
         )
-        enriched_details.append({"code": error_code, "count": count, "details": lines})
     return enriched_details
diff --git a/sarif/operations/trend_op.py b/sarif/operations/trend_op.py
@@ -8,7 +8,6 @@
 from sarif import sarif_file
 from sarif.sarif_file import SarifFileSet
 
-ATTRIBUTES = ["Severity", "Code", "Location", "Line"]
 TIMESTAMP_COLUMNS = ["Date", "Tool", *sarif_file.SARIF_SEVERITIES]
 
 

diff --git a/sarif/operations/word_op.py b/sarif/operations/word_op.py
@@ -18,11 +18,13 @@
 from docx.oxml import ns
 
 from sarif import charts, sarif_file
-from sarif.sarif_file import SarifFileSet
 
 
 def generate_word_docs_from_sarif_inputs(
-    input_files: SarifFileSet, image_file: str, output: str, output_multiple_files: bool
+    input_files: sarif_file.SarifFileSet,
+    image_file: str,
+    output: str,
+    output_multiple_files: bool,
 ):
     """
     Convert SARIF input to Word file output.
@@ -115,7 +117,7 @@ def _dump_errors_summary_by_sev(document, sarif_data):
         # out in descending order.
         dict_of_error_codes = {}
         for error in errors_of_severity:
-            issue_code = error["Code"]
+            issue_code = sarif_file.combine_code_and_description(error)
             dict_of_error_codes[issue_code] = dict_of_error_codes.get(issue_code, 0) + 1
         sorted_dict = sorted(
             dict_of_error_codes.items(), key=lambda x: x[1], reverse=True
@@ -137,7 +139,9 @@ def _dump_each_error_in_detail(document, sarif_data):
     sev_to_records = sarif_data.get_records_grouped_by_severity()
     for severity in severities:
         errors_of_severity = sev_to_records.get(severity, [])
-        sorted_errors_by_severity = sorted(errors_of_severity, key=lambda x: x["Code"])
+        sorted_errors_by_severity = sorted(
+            errors_of_severity, key=sarif_file.combine_code_and_description
+        )
         # Sample:
         # [{'Location': 'C:\\Max\\AccessionAndroid\\scripts\\parse_coverage.py', 'Line': 119,
         #       'Severity': 'error', 'Code': 'DS126186 Disabled certificate validation'},
@@ -177,7 +181,7 @@ def _dump_each_error_in_detail(document, sarif_data):
 
             for eachrow in sorted_errors_by_severity:
                 cells_text += [
-                    eachrow["Code"],
+                    sarif_file.combine_code_and_description(eachrow),
                     eachrow["Location"],
                     str(eachrow["Line"]),
                 ]