Skip to content

Commit

Permalink
Merge pull request #1024 from GitGuardian/mmillet/spi-515-add-all_sec…
Browse files Browse the repository at this point in the history
…rets-option-to-ggshield-secret-scans_bis

Mmillet/spi 515 add all secrets option to ggshield secret scans
  • Loading branch information
gg-mmill authored Dec 9, 2024
2 parents 4479d68 + cb39b68 commit 6d0d8b8
Show file tree
Hide file tree
Showing 48 changed files with 2,805 additions and 287 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<!--
A new scriv changelog fragment.
Uncomment the section that is right (remove the HTML comment wrapper).
-->

<!--
### Removed
- A bullet item for the Removed category.
-->

### Added

- The `--all-secrets` option to secret scans, allowing to display all found secrets, and their possible ignore reason.

<!--
### Changed
- A bullet item for the Changed category.
-->
<!--
### Deprecated
- A bullet item for the Deprecated category.
-->
<!--
### Fixed
- A bullet item for the Fixed category.
-->
<!--
### Security
- A bullet item for the Security category.
-->
10 changes: 10 additions & 0 deletions ggshield/cmd/secret/scan/secret_scan_common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ def _banlist_detectors_callback(
)


_all_secrets = click.option(
"--all-secrets",
is_flag=True,
help=("Do not ignore any secret. Possible ignore-reason is shown as well."),
callback=create_config_callback("secret", "all_secrets"),
default=None,
)


def add_secret_scan_common_options() -> Callable[[AnyFunction], AnyFunction]:
def decorator(cmd: AnyFunction) -> AnyFunction:
add_common_options()(cmd)
Expand All @@ -153,6 +162,7 @@ def decorator(cmd: AnyFunction) -> AnyFunction:
_banlist_detectors_option(cmd)
_with_incident_details_option(cmd)
instance_option(cmd)
_all_secrets(cmd)
return cmd

return decorator
Expand Down
18 changes: 18 additions & 0 deletions ggshield/core/config/user_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import re
from dataclasses import field
Expand Down Expand Up @@ -47,6 +48,7 @@ class SecretConfig(FilteredConfig):
ignore_known_secrets: bool = False
with_incident_details: bool = False
# if configuration key is left unset the dashboard's remediation message is used.
all_secrets: bool = False
prereceive_remediation_message: str = ""

def add_ignored_match(self, secret: IgnoredMatch) -> None:
Expand All @@ -61,6 +63,22 @@ def add_ignored_match(self, secret: IgnoredMatch) -> None:
return
self.ignored_matches.append(secret)

def dump_for_monitoring(self) -> str:
return json.dumps(
{
"show_secrets": self.show_secrets,
"ignored_detectors_count": len(self.ignored_detectors),
"ignored_matches_count": len(self.ignored_matches),
"ignored_paths_count": len(self.ignored_paths),
"ignore_known_secrets": self.ignore_known_secrets,
"with_incident_details": self.with_incident_details,
"has_prereceive_remediation_message": bool(
self.prereceive_remediation_message
),
"all_secrets": self.all_secrets,
}
)


def validate_policy_id(policy_id: str) -> bool:
return bool(POLICY_ID_PATTERN.fullmatch(policy_id))
Expand Down
59 changes: 46 additions & 13 deletions ggshield/verticals/secret/output/secret_text_output_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,19 @@
from pygitguardian.client import VERSIONS
from pygitguardian.models import PolicyBreak

from ggshield.core.constants import IncidentStatus
from ggshield.core.filter import group_policy_breaks_by_ignore_sha
from ggshield.core.lines import Line, get_offset, get_padding
from ggshield.core.text_utils import (
STYLE,
clip_long_line,
file_info,
format_text,
pluralize,
translate_validity,
)
from ggshield.verticals.secret.secret_scanner import IgnoreReason

from ..extended_match import ExtendedMatch
from ..secret_scan_collection import Result, SecretScanCollection
from ..secret_scan_collection import IgnoreReason, Result, SecretScanCollection
from .secret_output_handler import SecretOutputHandler


Expand All @@ -29,6 +28,31 @@
NB_CONTEXT_LINES = 3


def secret_file_info(
filename: str,
incident_count: int,
incident_status: IncidentStatus = IncidentStatus.DETECTED,
hidden_secrets_count: int = 0,
) -> str:
"""Return the formatted file info (number of secrets, filename, optional number of hidden secrets)."""
result = "\n{} {}: {} {} {}\n".format(
format_text(">", STYLE["detector_line_start"]),
format_text(filename, STYLE["filename"]),
incident_count,
pluralize("secret", incident_count, "secrets"),
incident_status.value,
)
if hidden_secrets_count > 0:
result += "{} {} {} {} ignored - use the `--all-secrets` option to display {}\n".format(
format_text(">", STYLE["detector_line_start"]),
" " * len(filename),
hidden_secrets_count,
pluralize("secret", hidden_secrets_count, "secrets"),
pluralize("it", hidden_secrets_count, "them"),
)
return result


class SecretTextOutputHandler(SecretOutputHandler):
def _process_scan_impl(self, scan: SecretScanCollection) -> str:
"""Output Secret Scan Collection in text format"""
Expand Down Expand Up @@ -100,6 +124,9 @@ def process_result(self, result: Result) -> str:
result.censor()

number_of_displayed_secrets = 0
number_of_hidden_secrets = sum(
result.ignored_policy_breaks_count_by_reason.values()
)
for ignore_sha, policy_breaks in sha_dict.items():
number_of_displayed_secrets += 1

Expand All @@ -118,8 +145,12 @@ def process_result(self, result: Result) -> str:
)

file_info_line = ""
if number_of_displayed_secrets > 0:
file_info_line = file_info(result.filename, number_of_displayed_secrets)
if number_of_displayed_secrets > 0 or number_of_hidden_secrets > 0:
file_info_line = secret_file_info(
result.filename,
incident_count=number_of_displayed_secrets,
hidden_secrets_count=number_of_hidden_secrets,
)

return file_info_line + result_buf.getvalue()

Expand Down Expand Up @@ -259,28 +290,30 @@ def policy_break_header(
"""
Build a header for the policy break.
"""
policy_break = policy_breaks[0]
indent = " "
validity_msg = (
f"\n{indent}Validity: {format_text(translate_validity(policy_breaks[0].validity), STYLE['incident_validity'])}"
if policy_breaks[0].validity
f"\n{indent}Validity: {format_text(translate_validity(policy_break.validity), STYLE['incident_validity'])}"
if policy_break.validity
else ""
)

start_line = format_text(">>", STYLE["detector_line_start"])
policy_break_type = format_text(
policy_breaks[0].break_type, STYLE["policy_break_type"]
)
policy_break_type = format_text(policy_break.break_type, STYLE["policy_break_type"])
number_occurrences = format_text(str(len(policy_breaks)), STYLE["occurrence_count"])
ignore_sha = format_text(ignore_sha, STYLE["ignore_sha"])

return f"""
message = f"""
{start_line} Secret detected: {policy_break_type}{validity_msg}
{indent}Occurrences: {number_occurrences}
{indent}Known by GitGuardian dashboard: {"YES" if known_secret else "NO"}
{indent}Incident URL: {policy_breaks[0].incident_url if known_secret and policy_breaks[0].incident_url else "N/A"}
{indent}Incident URL: {policy_breaks[0].incident_url if known_secret and policy_break.incident_url else "N/A"}
{indent}Secret SHA: {ignore_sha}
"""
if policy_break.is_excluded:
message += f"{indent}Ignored: {policy_break.exclude_reason}\n"

return message + "\n"


def no_leak_message() -> str:
Expand Down
106 changes: 67 additions & 39 deletions ggshield/verticals/secret/secret_scan_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from enum import Enum
from pathlib import Path
from typing import (
Any,
Callable,
Counter,
Dict,
Iterable,
List,
Expand All @@ -15,22 +14,52 @@
)

from pygitguardian import GGClient
from pygitguardian.models import Detail, Match, PolicyBreak, ScanResult, SecretIncident
from pygitguardian.models import (
Detail,
DiffKind,
Match,
PolicyBreak,
ScanResult,
SecretIncident,
)

from ggshield.core.config.user_config import SecretConfig
from ggshield.core.errors import UnexpectedError, handle_api_error
from ggshield.core.filter import is_in_ignored_matches
from ggshield.core.lines import Line, get_lines_from_content
from ggshield.core.scan.scannable import Scannable
from ggshield.core.scan import Scannable
from ggshield.utils.git_shell import Filemode
from ggshield.verticals.secret.extended_match import ExtendedMatch


class IgnoreReason(Enum):
class IgnoreReason(str, Enum):
IGNORED_MATCH = "ignored_match"
IGNORED_DETECTOR = "ignored_detector"
KNOWN_SECRET = "known_secret"
NOT_INTRODUCED = "not_introduced"
BACKEND_EXCLUDED = "backend_excluded"


def compute_ignore_reason(
policy_break: PolicyBreak, secret_config: SecretConfig
) -> Optional[str]:
"""Computes the possible ignore reason associated with a PolicyBreak"""
ignore_reason = None
if policy_break.diff_kind in {DiffKind.DELETION, DiffKind.CONTEXT}:
ignore_reason = IgnoreReason.NOT_INTRODUCED
elif policy_break.is_excluded:
ignore_reason = f"Excluded from backend ({policy_break.exclude_reason})"
elif is_in_ignored_matches(policy_break, secret_config.ignored_matches or []):
ignore_reason = IgnoreReason.IGNORED_MATCH
elif policy_break.break_type in secret_config.ignored_detectors:
ignore_reason = IgnoreReason.IGNORED_DETECTOR
elif secret_config.ignore_known_secrets and policy_break.known_secret:
ignore_reason = IgnoreReason.KNOWN_SECRET

return ignore_reason


@dataclass
class Result:
"""
Return model for a scan which zips the information
Expand All @@ -42,28 +71,7 @@ class Result:
path: Path
url: str
policy_breaks: List[PolicyBreak]
ignored_policy_breaks_count_by_reason: Dict[IgnoreReason, int]

def __init__(self, file: Scannable, scan: ScanResult):
self.filename = file.filename
self.filemode = file.filemode
self.path = file.path
self.url = file.url
self.policy_breaks = scan.policy_breaks
lines = get_lines_from_content(file.content, self.filemode)
self.enrich_matches(lines)
self.ignored_policy_breaks_count_by_reason = {}

def __eq__(self, other: Any) -> bool:
if not isinstance(other, Result):
return False
return (
self.filename == other.filename
and self.filemode == other.filemode
and self.path == other.path
and self.url == other.url
and self.policy_breaks == other.policy_breaks
)
ignored_policy_breaks_count_by_reason: Counter[str]

@property
def is_on_patch(self) -> bool:
Expand All @@ -90,21 +98,41 @@ def censor(self) -> None:
def has_policy_breaks(self) -> bool:
return len(self.policy_breaks) > 0

def apply_ignore_function(
self, reason: IgnoreReason, ignore_function: Callable[[PolicyBreak], bool]
):
assert (
reason not in self.ignored_policy_breaks_count_by_reason
), f"Ignore was already computed for {IgnoreReason}"
@classmethod
def from_scan_result(
cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig
) -> "Result":
"""Creates a Result from a Scannable and a ScanResult.
- Removes ignored policy breaks
- replace matches by ExtendedMatches
"""

to_keep = []
ignored_count = 0
for policy_break in self.policy_breaks:
if ignore_function(policy_break):
ignored_count += 1
ignored_policy_breaks_count_by_reason = Counter()
for policy_break in scan_result.policy_breaks:
ignore_reason = compute_ignore_reason(policy_break, secret_config)
if ignore_reason is not None:
if secret_config.all_secrets:
policy_break.exclude_reason = ignore_reason
policy_break.is_excluded = True
to_keep.append(policy_break)
else:
ignored_policy_breaks_count_by_reason[ignore_reason] += 1
else:
to_keep.append(policy_break)
self.policy_breaks = to_keep
self.ignored_policy_breaks_count_by_reason[reason] = ignored_count

result = Result(
filename=file.filename,
filemode=file.filemode,
path=file.path,
url=file.url,
policy_breaks=to_keep,
ignored_policy_breaks_count_by_reason=ignored_policy_breaks_count_by_reason,
)

lines = get_lines_from_content(file.content, file.filemode)
result.enrich_matches(lines)
return result


class Error(NamedTuple):
Expand Down
Loading

0 comments on commit 6d0d8b8

Please sign in to comment.