Merge pull request #1024 from GitGuardian/mmillet/spi-515-add-all_sec…

…rets-option-to-ggshield-secret-scans_bis Mmillet/spi 515 add all secrets option to ggshield secret scans
GitGuardian · Dec 9, 2024 · 6d0d8b8 · 6d0d8b8
2 parents 4479d68 + cb39b68
commit 6d0d8b8
Show file tree

Hide file tree

Showing 48 changed files with 2,805 additions and 287 deletions.
diff --git a/...0_mathias.millet_spi_515_add_all_secrets_option_to_ggshield_secret_scans_bis.md b/...0_mathias.millet_spi_515_add_all_secrets_option_to_ggshield_secret_scans_bis.md
@@ -0,0 +1,41 @@
+<!--
+A new scriv changelog fragment.
+
+Uncomment the section that is right (remove the HTML comment wrapper).
+-->
+
+<!--
+### Removed
+
+- A bullet item for the Removed category.
+
+-->
+
+### Added
+
+- The `--all-secrets` option to secret scans, allowing to display all found secrets, and their possible ignore reason.
+
+<!--
+### Changed
+
+- A bullet item for the Changed category.
+
+-->
+<!--
+### Deprecated
+
+- A bullet item for the Deprecated category.
+
+-->
+<!--
+### Fixed
+
+- A bullet item for the Fixed category.
+
+-->
+<!--
+### Security
+
+- A bullet item for the Security category.
+
+-->
diff --git a/ggshield/cmd/secret/scan/secret_scan_common_options.py b/ggshield/cmd/secret/scan/secret_scan_common_options.py
@@ -140,6 +140,15 @@ def _banlist_detectors_callback(
 )
 
 
+_all_secrets = click.option(
+    "--all-secrets",
+    is_flag=True,
+    help=("Do not ignore any secret. Possible ignore-reason is shown as well."),
+    callback=create_config_callback("secret", "all_secrets"),
+    default=None,
+)
+
+
 def add_secret_scan_common_options() -> Callable[[AnyFunction], AnyFunction]:
     def decorator(cmd: AnyFunction) -> AnyFunction:
         add_common_options()(cmd)
@@ -153,6 +162,7 @@ def decorator(cmd: AnyFunction) -> AnyFunction:
         _banlist_detectors_option(cmd)
         _with_incident_details_option(cmd)
         instance_option(cmd)
+        _all_secrets(cmd)
         return cmd
 
     return decorator

diff --git a/ggshield/core/config/user_config.py b/ggshield/core/config/user_config.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import re
 from dataclasses import field
@@ -47,6 +48,7 @@ class SecretConfig(FilteredConfig):
     ignore_known_secrets: bool = False
     with_incident_details: bool = False
     # if configuration key is left unset the dashboard's remediation message is used.
+    all_secrets: bool = False
     prereceive_remediation_message: str = ""
 
     def add_ignored_match(self, secret: IgnoredMatch) -> None:
@@ -61,6 +63,22 @@ def add_ignored_match(self, secret: IgnoredMatch) -> None:
                 return
         self.ignored_matches.append(secret)
 
+    def dump_for_monitoring(self) -> str:
+        return json.dumps(
+            {
+                "show_secrets": self.show_secrets,
+                "ignored_detectors_count": len(self.ignored_detectors),
+                "ignored_matches_count": len(self.ignored_matches),
+                "ignored_paths_count": len(self.ignored_paths),
+                "ignore_known_secrets": self.ignore_known_secrets,
+                "with_incident_details": self.with_incident_details,
+                "has_prereceive_remediation_message": bool(
+                    self.prereceive_remediation_message
+                ),
+                "all_secrets": self.all_secrets,
+            }
+        )
+
 
 def validate_policy_id(policy_id: str) -> bool:
     return bool(POLICY_ID_PATTERN.fullmatch(policy_id))

diff --git a/ggshield/verticals/secret/output/secret_text_output_handler.py b/ggshield/verticals/secret/output/secret_text_output_handler.py
@@ -5,20 +5,19 @@
 from pygitguardian.client import VERSIONS
 from pygitguardian.models import PolicyBreak
 
+from ggshield.core.constants import IncidentStatus
 from ggshield.core.filter import group_policy_breaks_by_ignore_sha
 from ggshield.core.lines import Line, get_offset, get_padding
 from ggshield.core.text_utils import (
     STYLE,
     clip_long_line,
-    file_info,
     format_text,
     pluralize,
     translate_validity,
 )
-from ggshield.verticals.secret.secret_scanner import IgnoreReason
 
 from ..extended_match import ExtendedMatch
-from ..secret_scan_collection import Result, SecretScanCollection
+from ..secret_scan_collection import IgnoreReason, Result, SecretScanCollection
 from .secret_output_handler import SecretOutputHandler
 
 
@@ -29,6 +28,31 @@
 NB_CONTEXT_LINES = 3
 
 
+def secret_file_info(
+    filename: str,
+    incident_count: int,
+    incident_status: IncidentStatus = IncidentStatus.DETECTED,
+    hidden_secrets_count: int = 0,
+) -> str:
+    """Return the formatted file info (number of secrets, filename, optional number of hidden secrets)."""
+    result = "\n{} {}: {} {} {}\n".format(
+        format_text(">", STYLE["detector_line_start"]),
+        format_text(filename, STYLE["filename"]),
+        incident_count,
+        pluralize("secret", incident_count, "secrets"),
+        incident_status.value,
+    )
+    if hidden_secrets_count > 0:
+        result += "{} {}  {} {} ignored - use the `--all-secrets` option to display {}\n".format(
+            format_text(">", STYLE["detector_line_start"]),
+            " " * len(filename),
+            hidden_secrets_count,
+            pluralize("secret", hidden_secrets_count, "secrets"),
+            pluralize("it", hidden_secrets_count, "them"),
+        )
+    return result
+
+
 class SecretTextOutputHandler(SecretOutputHandler):
     def _process_scan_impl(self, scan: SecretScanCollection) -> str:
         """Output Secret Scan Collection in text format"""
@@ -100,6 +124,9 @@ def process_result(self, result: Result) -> str:
             result.censor()
 
         number_of_displayed_secrets = 0
+        number_of_hidden_secrets = sum(
+            result.ignored_policy_breaks_count_by_reason.values()
+        )
         for ignore_sha, policy_breaks in sha_dict.items():
             number_of_displayed_secrets += 1
 
@@ -118,8 +145,12 @@ def process_result(self, result: Result) -> str:
             )
 
         file_info_line = ""
-        if number_of_displayed_secrets > 0:
-            file_info_line = file_info(result.filename, number_of_displayed_secrets)
+        if number_of_displayed_secrets > 0 or number_of_hidden_secrets > 0:
+            file_info_line = secret_file_info(
+                result.filename,
+                incident_count=number_of_displayed_secrets,
+                hidden_secrets_count=number_of_hidden_secrets,
+            )
 
         return file_info_line + result_buf.getvalue()
 
@@ -259,28 +290,30 @@ def policy_break_header(
     """
     Build a header for the policy break.
     """
+    policy_break = policy_breaks[0]
     indent = "   "
     validity_msg = (
-        f"\n{indent}Validity: {format_text(translate_validity(policy_breaks[0].validity), STYLE['incident_validity'])}"
-        if policy_breaks[0].validity
+        f"\n{indent}Validity: {format_text(translate_validity(policy_break.validity), STYLE['incident_validity'])}"
+        if policy_break.validity
         else ""
     )
 
     start_line = format_text(">>", STYLE["detector_line_start"])
-    policy_break_type = format_text(
-        policy_breaks[0].break_type, STYLE["policy_break_type"]
-    )
+    policy_break_type = format_text(policy_break.break_type, STYLE["policy_break_type"])
     number_occurrences = format_text(str(len(policy_breaks)), STYLE["occurrence_count"])
     ignore_sha = format_text(ignore_sha, STYLE["ignore_sha"])
 
-    return f"""
+    message = f"""
 {start_line} Secret detected: {policy_break_type}{validity_msg}
 {indent}Occurrences: {number_occurrences}
 {indent}Known by GitGuardian dashboard: {"YES" if known_secret else "NO"}
-{indent}Incident URL: {policy_breaks[0].incident_url if known_secret and policy_breaks[0].incident_url else "N/A"}
+{indent}Incident URL: {policy_breaks[0].incident_url if known_secret and policy_break.incident_url else "N/A"}
 {indent}Secret SHA: {ignore_sha}
-
 """
+    if policy_break.is_excluded:
+        message += f"{indent}Ignored: {policy_break.exclude_reason}\n"
+
+    return message + "\n"
 
 
 def no_leak_message() -> str:

diff --git a/ggshield/verticals/secret/secret_scan_collection.py b/ggshield/verticals/secret/secret_scan_collection.py
@@ -2,8 +2,7 @@
 from enum import Enum
 from pathlib import Path
 from typing import (
-    Any,
-    Callable,
+    Counter,
     Dict,
     Iterable,
     List,
@@ -15,22 +14,52 @@
 )
 
 from pygitguardian import GGClient
-from pygitguardian.models import Detail, Match, PolicyBreak, ScanResult, SecretIncident
+from pygitguardian.models import (
+    Detail,
+    DiffKind,
+    Match,
+    PolicyBreak,
+    ScanResult,
+    SecretIncident,
+)
 
+from ggshield.core.config.user_config import SecretConfig
 from ggshield.core.errors import UnexpectedError, handle_api_error
+from ggshield.core.filter import is_in_ignored_matches
 from ggshield.core.lines import Line, get_lines_from_content
-from ggshield.core.scan.scannable import Scannable
+from ggshield.core.scan import Scannable
 from ggshield.utils.git_shell import Filemode
 from ggshield.verticals.secret.extended_match import ExtendedMatch
 
 
-class IgnoreReason(Enum):
+class IgnoreReason(str, Enum):
     IGNORED_MATCH = "ignored_match"
     IGNORED_DETECTOR = "ignored_detector"
     KNOWN_SECRET = "known_secret"
     NOT_INTRODUCED = "not_introduced"
+    BACKEND_EXCLUDED = "backend_excluded"
+
+
+def compute_ignore_reason(
+    policy_break: PolicyBreak, secret_config: SecretConfig
+) -> Optional[str]:
+    """Computes the possible ignore reason associated with a PolicyBreak"""
+    ignore_reason = None
+    if policy_break.diff_kind in {DiffKind.DELETION, DiffKind.CONTEXT}:
+        ignore_reason = IgnoreReason.NOT_INTRODUCED
+    elif policy_break.is_excluded:
+        ignore_reason = f"Excluded from backend ({policy_break.exclude_reason})"
+    elif is_in_ignored_matches(policy_break, secret_config.ignored_matches or []):
+        ignore_reason = IgnoreReason.IGNORED_MATCH
+    elif policy_break.break_type in secret_config.ignored_detectors:
+        ignore_reason = IgnoreReason.IGNORED_DETECTOR
+    elif secret_config.ignore_known_secrets and policy_break.known_secret:
+        ignore_reason = IgnoreReason.KNOWN_SECRET
 
+    return ignore_reason
 
+
+@dataclass
 class Result:
     """
     Return model for a scan which zips the information
@@ -42,28 +71,7 @@ class Result:
     path: Path
     url: str
     policy_breaks: List[PolicyBreak]
-    ignored_policy_breaks_count_by_reason: Dict[IgnoreReason, int]
-
-    def __init__(self, file: Scannable, scan: ScanResult):
-        self.filename = file.filename
-        self.filemode = file.filemode
-        self.path = file.path
-        self.url = file.url
-        self.policy_breaks = scan.policy_breaks
-        lines = get_lines_from_content(file.content, self.filemode)
-        self.enrich_matches(lines)
-        self.ignored_policy_breaks_count_by_reason = {}
-
-    def __eq__(self, other: Any) -> bool:
-        if not isinstance(other, Result):
-            return False
-        return (
-            self.filename == other.filename
-            and self.filemode == other.filemode
-            and self.path == other.path
-            and self.url == other.url
-            and self.policy_breaks == other.policy_breaks
-        )
+    ignored_policy_breaks_count_by_reason: Counter[str]
 
     @property
     def is_on_patch(self) -> bool:
@@ -90,21 +98,41 @@ def censor(self) -> None:
     def has_policy_breaks(self) -> bool:
         return len(self.policy_breaks) > 0
 
-    def apply_ignore_function(
-        self, reason: IgnoreReason, ignore_function: Callable[[PolicyBreak], bool]
-    ):
-        assert (
-            reason not in self.ignored_policy_breaks_count_by_reason
-        ), f"Ignore was already computed for {IgnoreReason}"
+    @classmethod
+    def from_scan_result(
+        cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig
+    ) -> "Result":
+        """Creates a Result from a Scannable and a ScanResult.
+        - Removes ignored policy breaks
+        - replace matches by ExtendedMatches
+        """
+
         to_keep = []
-        ignored_count = 0
-        for policy_break in self.policy_breaks:
-            if ignore_function(policy_break):
-                ignored_count += 1
+        ignored_policy_breaks_count_by_reason = Counter()
+        for policy_break in scan_result.policy_breaks:
+            ignore_reason = compute_ignore_reason(policy_break, secret_config)
+            if ignore_reason is not None:
+                if secret_config.all_secrets:
+                    policy_break.exclude_reason = ignore_reason
+                    policy_break.is_excluded = True
+                    to_keep.append(policy_break)
+                else:
+                    ignored_policy_breaks_count_by_reason[ignore_reason] += 1
             else:
                 to_keep.append(policy_break)
-        self.policy_breaks = to_keep
-        self.ignored_policy_breaks_count_by_reason[reason] = ignored_count
+
+        result = Result(
+            filename=file.filename,
+            filemode=file.filemode,
+            path=file.path,
+            url=file.url,
+            policy_breaks=to_keep,
+            ignored_policy_breaks_count_by_reason=ignored_policy_breaks_count_by_reason,
+        )
+
+        lines = get_lines_from_content(file.content, file.filemode)
+        result.enrich_matches(lines)
+        return result
 
 
 class Error(NamedTuple):