Skip to content

Commit

Permalink
[MR] markdown render with html sanitizer (#10300)
Browse files Browse the repository at this point in the history
* Merged PR 2525: Introduce html Sanitizer
* Merged PR 2534: Sign third party packages
  • Loading branch information
lyndaidaii authored Dec 6, 2024
1 parent 3e6294d commit ce2fa53
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 37 deletions.
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<PackageVersion Include="EntityFramework" Version="6.4.4" />
<PackageVersion Include="FluentAssertions" Version="5.5.0" />
<PackageVersion Include="FluentLinkChecker" Version="1.0.0.10" />
<PackageVersion Include="HtmlSanitizer" Version="8.1.870" />
<PackageVersion Include="Knapcode.MiniZip" Version="0.20.0" />
<PackageVersion Include="LibGit2Sharp" Version="0.26.0" />
<PackageVersion Include="Lucene.Net.Contrib" Version="3.0.3" />
Expand Down
3 changes: 3 additions & 0 deletions NuGet.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
<clear />
<packageSource key="NuGet.org">
<package pattern="Antlr" />
<package pattern="AngleSharp.*" />
<package pattern="AngleSharp" />
<package pattern="Autofac.*" />
<package pattern="Autofac" />
<package pattern="Azure.*" />
Expand All @@ -26,6 +28,7 @@
<package pattern="FluentAssertions" />
<package pattern="FluentLinkChecker" />
<package pattern="HtmlAgilityPack" />
<package pattern="HtmlSanitizer" />
<package pattern="Hyak.Common" />
<package pattern="Knapcode.MiniZip" />
<package pattern="LibGit2Sharp.NativeBinaries" />
Expand Down
3 changes: 3 additions & 0 deletions sign.thirdparty.props
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<Project>
<ItemGroup>
<ThirdPartyBinaries Include="AngleSharp.dll" />
<ThirdPartyBinaries Include="AngleSharp.Css.dll" />
<ThirdPartyBinaries Include="AnglicanGeek.MarkdownMailer.dll" />
<ThirdPartyBinaries Include="Antlr3.Runtime.dll" />
<ThirdPartyBinaries Include="Autofac.dll" />
Expand All @@ -16,6 +18,7 @@
<ThirdPartyBinaries Include="Elmah.dll" />
<ThirdPartyBinaries Include="git2-572e4d8.dll" />
<ThirdPartyBinaries Include="HtmlAgilityPack.dll" />
<ThirdPartyBinaries Include="HtmlSanitizer.dll" />
<ThirdPartyBinaries Include="ICSharpCode.SharpZipLib.dll" />
<ThirdPartyBinaries Include="json-ld.net.StrongName.dll" />
<ThirdPartyBinaries Include="Knapcode.MiniZip.dll" />
Expand Down
2 changes: 2 additions & 0 deletions src/NuGetGallery/App_Start/DefaultDependenciesModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
using Autofac;
using Autofac.Core;
using Autofac.Extensions.DependencyInjection;
using Ganss.Xss;
using Microsoft.ApplicationInsights.Extensibility;
using Microsoft.ApplicationInsights.Extensibility.Implementation;
using Microsoft.Extensions.DependencyInjection;
Expand Down Expand Up @@ -132,6 +133,7 @@ protected override void Load(ContainerBuilder builder)

services.AddSingleton(loggerFactory);
services.AddSingleton(typeof(ILogger<>), typeof(Logger<>));
services.AddSingleton<IHtmlSanitizer, HtmlSanitizer>();

UrlHelperExtensions.SetConfigurationService(configuration);
builder.RegisterType<UrlHelperWrapper>()
Expand Down
1 change: 1 addition & 0 deletions src/NuGetGallery/NuGetGallery.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2204,6 +2204,7 @@
<PackageReference Include="NuGet.StrongName.elmah.sqlserver" />
<PackageReference Include="NuGet.StrongName.elmah" />
<PackageReference Include="EntityFramework" />
<PackageReference Include="HtmlSanitizer" />
<PackageReference Include="Lucene.Net" />
<PackageReference Include="Lucene.Net.Contrib" />
<PackageReference Include="Microsoft.ApplicationInsights.TraceListener" />
Expand Down
39 changes: 31 additions & 8 deletions src/NuGetGallery/Services/MarkdownService.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
Expand All @@ -7,6 +7,7 @@
using System.Web;
using CommonMark;
using CommonMark.Syntax;
using Ganss.Xss;
using Markdig;
using Markdig.Extensions.EmphasisExtras;
using Markdig.Renderers;
Expand All @@ -20,19 +21,38 @@ public class MarkdownService : IMarkdownService
private static readonly TimeSpan RegexTimeout = TimeSpan.FromMinutes(1);
private static readonly Regex EncodedBlockQuotePattern = new Regex("^ {0,3}&gt;", RegexOptions.Multiline, RegexTimeout);
private static readonly Regex LinkPattern = new Regex("<a href=([\"\']).*?\\1", RegexOptions.None, RegexTimeout);
private static readonly Regex JavaScriptPattern = new Regex("<a href=([\"\'])javascript:.*?\\1 rel=([\"'])noopener noreferrer nofollow\\1>", RegexOptions.None, RegexTimeout);
private static readonly Regex HtmlCommentPattern = new Regex("<!--.*?-->", RegexOptions.Singleline, RegexTimeout);
private static readonly Regex ImageTextPattern = new Regex("!\\[\\]\\(", RegexOptions.Singleline, RegexTimeout);
private static readonly string altTextForImage = "alternate text is missing from this package README image";
private static readonly string AltTextForImage = "alternate text is missing from this package README image";

private readonly IFeatureFlagService _features;
private readonly IImageDomainValidator _imageDomainValidator;
private readonly IHtmlSanitizer _htmlSanitizer;

public MarkdownService(IFeatureFlagService features,
IImageDomainValidator imageDomainValidator)
IImageDomainValidator imageDomainValidator,
IHtmlSanitizer htmlSanitizer)
{
_features = features ?? throw new ArgumentNullException(nameof(features));
_imageDomainValidator = imageDomainValidator ?? throw new ArgumentNullException(nameof(imageDomainValidator));
_htmlSanitizer = htmlSanitizer ?? throw new ArgumentNullException(nameof(htmlSanitizer));
SanitizerSettings();
}

private void SanitizerSettings()
{
//Configure allowed tags, attributes for the sanitizer
_htmlSanitizer.AllowedAttributes.Add("id");
_htmlSanitizer.AllowedAttributes.Add("class");
}

private string SanitizeText(string input)
{
if (!string.IsNullOrWhiteSpace(input))
{
return _htmlSanitizer.Sanitize(input);
}
return input;
}

public RenderedMarkdownResult GetHtmlFromMarkdown(string markdownString)
Expand All @@ -42,6 +62,7 @@ public RenderedMarkdownResult GetHtmlFromMarkdown(string markdownString)
throw new ArgumentNullException(nameof(markdownString));
}


if (_features.IsMarkdigMdRenderingEnabled())
{
return GetHtmlFromMarkdownMarkdig(markdownString, 1);
Expand Down Expand Up @@ -179,7 +200,9 @@ private RenderedMarkdownResult GetHtmlFromMarkdownCommonMark(string markdownStri
using (var htmlWriter = new StringWriter())
{
CommonMarkConverter.ProcessStage3(document, htmlWriter, settings);
output.Content = LinkPattern.Replace(htmlWriter.ToString(), "$0" + " rel=\"noopener noreferrer nofollow\"").Trim();
string htmlContent = htmlWriter.ToString();
htmlContent = SanitizeText(htmlContent);
output.Content = LinkPattern.Replace(htmlContent, "$0" + " rel=\"noopener noreferrer nofollow\"").Trim();

return output;
}
Expand All @@ -197,7 +220,7 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,

var markdownWithoutComments = HtmlCommentPattern.Replace(markdownString, "");

var markdownWithImageAlt = ImageTextPattern.Replace(markdownWithoutComments, $"![{altTextForImage}](");
var markdownWithImageAlt = ImageTextPattern.Replace(markdownWithoutComments, $"![{AltTextForImage}](");

var markdownWithoutBom = markdownWithImageAlt.TrimStart('\ufeff');

Expand Down Expand Up @@ -286,10 +309,10 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,
renderer.Render(document);
output.Content = htmlWriter.ToString().Trim();
output.IsMarkdigMdSyntaxHighlightEnabled = _features.IsMarkdigMdSyntaxHighlightEnabled();
output.Content = JavaScriptPattern.Replace(htmlWriter.ToString(), "").Trim();
output.Content = SanitizeText(output.Content);

return output;
}
}
}
}
}
4 changes: 4 additions & 0 deletions src/NuGetGallery/Web.config
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,10 @@
<assemblyIdentity name="Autofac" publicKeyToken="17863AF14B0044DA" culture="neutral"/>
<bindingRedirect oldVersion="0.0.0.0-4.9.1.0" newVersion="4.9.1.0"/>
</dependentAssembly>
<dependentAssembly>
<assemblyIdentity name="AngleSharp" publicKeyToken="e83494dcdc6d31ea" culture="neutral"/>
<bindingRedirect oldVersion="0.0.0.0-0.17.1.0" newVersion="0.17.1.0"/>
</dependentAssembly>
</assemblyBinding>
</runtime>
</configuration>
Loading

0 comments on commit ce2fa53

Please sign in to comment.