From a503a3c280ba2fe71cdcd754f3ceeed946dd5f52 Mon Sep 17 00:00:00 2001 From: Florian Hussonnois Date: Fri, 16 Aug 2024 16:54:54 +0200 Subject: [PATCH] feat(grok): add new property keepEmptyCaptures --- .../plugin/transform/grok/GrokInterface.java | 7 +++ .../plugin/transform/grok/Transform.java | 16 ++++++- .../grok/pattern/GrokCaptureGroup.java | 4 ++ .../transform/grok/TransformValueTest.java | 44 +++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/GrokInterface.java b/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/GrokInterface.java index acab61a..5da0f7a 100644 --- a/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/GrokInterface.java +++ b/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/GrokInterface.java @@ -41,4 +41,11 @@ public interface GrokInterface { description = "The first successful match by grok will result in the task being finished. Set to `false` if you want the task to try all configured patterns." ) boolean isBreakOnFirstMatch(); + + @PluginProperty + @Schema( + title = "If `true`, keep empty captures.", + description = "When an optional field cannot be captured, the empty field is retained in the output. Set `false` if you want empty optional fields to be filtered out." + ) + boolean isKeepEmptyCaptures(); } diff --git a/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/Transform.java b/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/Transform.java index 57c814e..50fc17c 100644 --- a/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/Transform.java +++ b/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/Transform.java @@ -44,6 +44,9 @@ public abstract class Transform extends Task { @Builder.Default private boolean breakOnFirstMatch = true; + @Builder.Default + private boolean keepEmptyCaptures = false; + @Getter(AccessLevel.PRIVATE) private GrokPatternCompiler compiler; @@ -79,7 +82,18 @@ public Map matches(final byte[] bytes) { // merge all named captured Map mergedValues = new HashMap<>(); for (Map namedCaptured : allNamedCaptured) { - mergedValues.putAll(namedCaptured); + if (keepEmptyCaptures) { + mergedValues.putAll(namedCaptured); + } else { + Map filtered = namedCaptured.entrySet() + .stream() + .filter(entry -> { + Object value = entry.getValue(); + return value != null && (!(value instanceof String str) || !str.isEmpty()); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + mergedValues.putAll(filtered); + } } return mergedValues; } diff --git a/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/pattern/GrokCaptureGroup.java b/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/pattern/GrokCaptureGroup.java index 786d7f3..3871764 100644 --- a/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/pattern/GrokCaptureGroup.java +++ b/plugin-transform-grok/src/main/java/io/kestra/plugin/transform/grok/pattern/GrokCaptureGroup.java @@ -38,6 +38,8 @@ public GrokCaptureExtractor getExtractor(final Consumer consumer) { private record RawValueExtractor(int[] backRefs, Consumer consumer) implements GrokCaptureExtractor { + private static final String EMPTY_VALUE = ""; + /** * {@inheritDoc} */ @@ -50,6 +52,8 @@ public void extract(byte[] bytes, Region region) { String value = new String(bytes, offset, length, StandardCharsets.UTF_8); consumer.accept(value); break; // we only need to capture the first value. + } else { + consumer.accept(EMPTY_VALUE); } } } diff --git a/plugin-transform-grok/src/test/java/io/kestra/plugin/transform/grok/TransformValueTest.java b/plugin-transform-grok/src/test/java/io/kestra/plugin/transform/grok/TransformValueTest.java index a9ee9c4..1d82912 100644 --- a/plugin-transform-grok/src/test/java/io/kestra/plugin/transform/grok/TransformValueTest.java +++ b/plugin-transform-grok/src/test/java/io/kestra/plugin/transform/grok/TransformValueTest.java @@ -130,4 +130,48 @@ public void shouldTransformGivenConfigWithMultiplePatternsAndBreakTrue() throws output.getValue() ); } + + @Test + public void shouldTransformGivenKeepEmptyCapturesTrue() throws Exception { + // Given + RunContext runContext = runContextFactory.of(); + TransformValue task = TransformValue.builder() + .patterns(List.of("%{IP:client_ip}(?:\\s+%{WORD:method})? %{NOTSPACE:url}")) + .namedCapturesOnly(true) + .breakOnFirstMatch(true) + .keepEmptyCaptures(true) + .from("192.168.1.1 /index.html") + .build(); + + // When + TransformValue.Output output = task.run(runContext); + + // Then + Assertions.assertEquals( + Map.of("method", "", "client_ip", "192.168.1.1", "url", "/index.html"), + output.getValue() + ); + } + + @Test + public void shouldTransformGivenKeepEmptyCapturesFalse() throws Exception { + // Given + RunContext runContext = runContextFactory.of(); + TransformValue task = TransformValue.builder() + .patterns(List.of("%{IP:client_ip}(?:\\s+%{WORD:method})? %{NOTSPACE:url}")) + .namedCapturesOnly(true) + .breakOnFirstMatch(true) + .keepEmptyCaptures(false) + .from("192.168.1.1 /index.html") + .build(); + + // When + TransformValue.Output output = task.run(runContext); + + // Then + Assertions.assertEquals( + Map.of("client_ip", "192.168.1.1", "url", "/index.html"), + output.getValue() + ); + } } \ No newline at end of file