diff --git a/project/Build.scala b/project/Build.scala index 56dc68a056fe..d6f7dcab2e1c 100644 --- a/project/Build.scala +++ b/project/Build.scala @@ -473,8 +473,7 @@ object Build { // Settings used when compiling dotty with a non-bootstrapped dotty lazy val commonBootstrappedSettings = commonDottySettings ++ Seq( - // To enable support of scaladoc and language-server projects you need to change this to true - bspEnabled := false, + bspEnabled := enableBspAllProjects, (Compile / unmanagedSourceDirectories) += baseDirectory.value / "src-bootstrapped", version := dottyVersion, diff --git a/scaladoc-testcases/src/tests/sanitization.scala b/scaladoc-testcases/src/tests/sanitization.scala new file mode 100644 index 000000000000..1fbf4197e2d4 --- /dev/null +++ b/scaladoc-testcases/src/tests/sanitization.scala @@ -0,0 +1,34 @@ +package tests.sanitization + +/** */ +class Script + +/** < script >alert('hello') */ +class ScriptWithSpaces + +/** */ +class FakeSafeScript + +/** Example < Second <: Third <= Fourth */ +class NotATag + +/** Example < Second >: Third */ +class NotATagButHasGreaterThan + +/** aalert('hello') + */ +class TagOutsideCode + +/** + * see [[<:<]], or [[>:>]] + */ +class LinkToTagLike \ No newline at end of file diff --git a/scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala b/scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala index 88ca3534fcee..b812414cf873 100644 --- a/scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala +++ b/scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala @@ -1,10 +1,91 @@ package dotty.tools.scaladoc package tasty.comments +/** + * Removes HTML tags except simple ones that can be translated or that are definitely harmless, + * translates Scala/Javadoc tags, and generally cleans the input. + * Not the fastest code in the world, and should eventually be replaced by a real parser, + * but works fine for now. */ object Cleaner { import Regexes._ import java.util.regex.Matcher + // Tags that are considered safe enough and do not need escaping + private val SafeTags = Set( + "a", "abbr", "address", "area", "blockquote", "br", "b", "caption", "cite", "code", "col", "colgroup", + "dd", "del", "dfn", "em", "hr", "img", "ins", "i", "kbd", "label", "legend", "pre", "q", "samp", + "small", "span", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "var" + ) + + private def cleanHtml(text: String): String = { + val result = StringBuilder() + var index = 0 + var insideCode = false + var insideLink = false + while (index < text.length) { + if (insideCode) { + result.append(text(index)) + if (index >= 2 && text(index) == '`' && text(index - 1) == '`' && text(index - 2) == '`') { + insideCode = false + } + index += 1 + } else if (insideLink) { + result.append(text(index)) + if (index >= 1 && text(index) == ']' && text(index - 1) == ']') { + insideLink = false + } + index += 1 + } else if (index <= text.length - 3 && text(index) == '`' && text(index + 1) == '`' && text(index + 2) == '`') { + result.append("```") + insideCode = true + index += 3 + } else if (index <= text.length - 2 && text(index) == '[' && text(index + 1) == '[') { + result.append("[[") + insideLink = true + index += 2 + } else if (text(index) == safeTagMarker) { + // ignore it, it's a character that should never appear in everyday text anyway + index += 1 + } else if (text(index) == '<') { + val endOfNameIndex = text.indexOf(' ', index) + val endOfTagIndex = text.indexOf('>', index) + if (endOfNameIndex == -1 || endOfNameIndex == index + 1 || endOfTagIndex == -1) { + // not actually a tag, e.g., "< hello >", "a "<" // not actually a tag + case "p" | "div" => "\n\n" + case "h1" => "\n= " + case "/h1" => " =\n" + case "h2" => "\n== " + case "/h2" => " ==\n" + case "h3" => "\n=== " + case "/h3" => " ===\n" + case "h4" | "h5" | "h6" => "\n==== " + case "/h4" | "/h5" | "/h6" => " ====\n" + case "li" => "\n * - " + case "/li" => "" + case other => + val simple = if (other(0) == '/') other.substring(1) else other + if (SafeTags(simple)) { + s"$safeTagMarker${text.substring(index, endOfTagIndex + 1)}$safeTagMarker" + } else { + "" + } + }) + index = endOfTagIndex + 1 + } + } else { + result.append(text(index)) + index += 1 + } + } + result.toString + } + /** Prepares the comment for pre-parsing: removes documentation markers and * extra whitespace, removes dangerous HTML and Javadoc tags, and splits it * into lines. @@ -18,12 +99,8 @@ object Cleaner { } } val strippedComment = comment.trim.stripPrefix("/*").stripSuffix("*/") - val safeComment = DangerousTags.replaceAllIn(strippedComment, { htmlReplacement(_) }) - val javadoclessComment = JavadocTags.replaceAllIn(safeComment, { javadocReplacement(_) }) - val markedTagComment = - SafeTags.replaceAllIn(javadoclessComment, { mtch => - Matcher.quoteReplacement(s"$safeTagMarker${mtch.matched}$safeTagMarker") - }) - markedTagComment.linesIterator.toList map (cleanLine) + val safeComment = cleanHtml(strippedComment) + val javadoclessComment = JavadocTags.replaceAllIn(safeComment, javadocReplacement) + javadoclessComment.linesIterator.toList.map(cleanLine) } } diff --git a/scaladoc/src/dotty/tools/scaladoc/tasty/comments/CommentRegex.scala b/scaladoc/src/dotty/tools/scaladoc/tasty/comments/CommentRegex.scala index 3ff022198446..4de66d04ec2f 100644 --- a/scaladoc/src/dotty/tools/scaladoc/tasty/comments/CommentRegex.scala +++ b/scaladoc/src/dotty/tools/scaladoc/tasty/comments/CommentRegex.scala @@ -12,12 +12,6 @@ object Regexes { val CleanCommentLine = new Regex("""(?:\s*\*\s?\s?)?(.*)""") - /** Dangerous HTML tags that should be replaced by something safer, - * such as wiki syntax, or that should be dropped - */ - val DangerousTags = - new Regex("""<(/?(div|ol|ul|li|h[1-6]|p))( [^>]*)?/?>|""") - /** Javadoc tags that should be replaced by something useful, such as wiki * syntax, or that should be dropped. */ val JavadocTags = @@ -36,27 +30,7 @@ object Regexes { } } - /** Maps a dangerous HTML tag to a safe wiki replacement, or an empty string - * if it cannot be salvaged. */ - def htmlReplacement(mtch: Regex.Match): String = mtch.group(1) match { - case "p" | "div" => "\n\n" - case "h1" => "\n= " - case "/h1" => " =\n" - case "h2" => "\n== " - case "/h2" => " ==\n" - case "h3" => "\n=== " - case "/h3" => " ===\n" - case "h4" | "h5" | "h6" => "\n==== " - case "/h4" | "/h5" | "/h6" => " ====\n" - case "li" => "\n * - " - case _ => "" - } - - /** Safe HTML tags that can be kept. */ - val SafeTags = - new Regex("""((&\w+;)|(\d+;)|(?(abbr|acronym|address|area|a|bdo|big|blockquote|br|button|b|caption|cite|code|col|colgroup|dd|del|dfn|em|fieldset|form|hr|img|input|ins|i|kbd|label|legend|link|map|object|optgroup|option|param|pre|q|samp|select|small|span|strong|sub|sup|table|tbody|td|textarea|tfoot|th|thead|tr|tt|var)( [^>]*)?/?>))""") - - val safeTagMarker = '\u000E' + val safeTagMarker = '\u000E' // IMPORTANT: Only change if you've updated the sanitization tests to match val endOfLine = '\u000A' val endOfText = '\u0003' diff --git a/scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala b/scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala index e8567193c312..6392c44730f7 100644 --- a/scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala +++ b/scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala @@ -70,3 +70,19 @@ class BaseHtmlTest: assertTrue(s"File at $path does not exisits!", Files.exists(path)) val document = Jsoup.parse(IO.read(path)) op(DocumentContext(document, path)) + + def docHtml(dir: String, cls: String, syntax: String): String = + val dest = Files.createTempDirectory("test-doc").toFile + try + val args = Scaladoc.Args( + name = projectName, + tastyFiles = tastyFiles(dir), + output = dest, + projectVersion = Some(projectVersion), + defaultSyntax = List(syntax), + ) + Scaladoc.run(args)(using testContext) + val path = dest.toPath.resolve(s"tests/$dir/$cls.html") + val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path)) + doc.select(".doc").html() + finally dotty.tools.scaladoc.util.IO.delete(dest) \ No newline at end of file diff --git a/scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala b/scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala index 645e2d10bfa1..6ec8f1b5ba3a 100644 --- a/scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala +++ b/scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala @@ -13,21 +13,8 @@ import java.nio.file.Files */ class CaretTest extends BaseHtmlTest: - private def docHtml(cls: String, syntax: String = "markdown"): String = - val dest = Files.createTempDirectory("test-doc").toFile - try - val args = Scaladoc.Args( - name = projectName, - tastyFiles = tastyFiles("i25517"), - output = dest, - projectVersion = Some(projectVersion), - defaultSyntax = List(syntax), - ) - Scaladoc.run(args)(using testContext) - val path = dest.toPath.resolve(s"tests/i25517/$cls.html") - val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path)) - doc.select(".doc").html() - finally dotty.tools.scaladoc.util.IO.delete(dest) + private def docHtml(cls: String, syntax: String): String = + super.docHtml("i25517", cls, syntax) @Test def supTagsInMarkdown(): Unit = val html = docHtml("SupDefault", "markdown") diff --git a/scaladoc/test/dotty/tools/scaladoc/tasty/comments/TagSanitizationTest.scala b/scaladoc/test/dotty/tools/scaladoc/tasty/comments/TagSanitizationTest.scala new file mode 100644 index 000000000000..d29b6fdba3ed --- /dev/null +++ b/scaladoc/test/dotty/tools/scaladoc/tasty/comments/TagSanitizationTest.scala @@ -0,0 +1,46 @@ +package dotty.tools.scaladoc +package tasty +package comments + +import org.junit.Test +import org.junit.Assert.* + +import java.nio.file.Files + +class TagSanitizationTest extends BaseHtmlTest: + private def docHtml(cls: String): String = + super.docHtml("sanitization", cls, "markdown") + + @Test def scriptTag(): Unit = + val html = docHtml("Script") + assertEquals("
alert('hello')
", html) + + @Test def scriptTagWithSpaces(): Unit = + val html = docHtml("ScriptWithSpaces") + assertEquals("< script >alert('hello')
", html) + + @Test def scriptTagWithSafeChar(): Unit = + val html = docHtml("FakeSafeScript") + assertEquals("alert('hello')
", html) + + @Test def notATag(): Unit = + val html = docHtml("NotATag") + assertEquals("Example < Second <: Third <= Fourth
", html) + + @Test def notATagButHasGreaterThan(): Unit = + val html = docHtml("NotATagButHasGreaterThan") + assertEquals("Example < Second >: Third
", html) + + @Test def notATagButNoSpaces(): Unit = + val html = docHtml("NotATagButNoSpaces") + assertEquals("a<b
", html) + + @Test def tagOutsideCode(): Unit = + val html = docHtml("TagOutsideCode") + assertFalse(html, html.contains("")) + + @Test def linkToTagLike(): Unit = + val html = docHtml("LinkToTagLike") + // ensure we don't treat the text between <:< and >:> as a tag content + assertTrue(html, html.contains("or"))