scala · SolalPirelli · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 2, 2026
diff --git a/project/Build.scala b/project/Build.scala
@@ -473,8 +473,7 @@ object Build {
 
   // Settings used when compiling dotty with a non-bootstrapped dotty
   lazy val commonBootstrappedSettings = commonDottySettings ++ Seq(
-    // To enable support of scaladoc and language-server projects you need to change this to true
-    bspEnabled := false,
+    bspEnabled := enableBspAllProjects,
     (Compile / unmanagedSourceDirectories) += baseDirectory.value / "src-bootstrapped",
 
     version := dottyVersion,

diff --git a/scaladoc-testcases/src/tests/sanitization.scala b/scaladoc-testcases/src/tests/sanitization.scala
@@ -0,0 +1,34 @@
+package tests.sanitization
+
+/** <script>alert('hello')</script> */
+class Script
+
+/** < script   >alert('hello')</script
+> */
+class ScriptWithSpaces
+
+/** <script>alert('hello')</script> */
+class FakeSafeScript
+
+/** Example < Second <: Third <= Fourth */
+class NotATag
+
+/** Example < Second >: Third */
+class NotATagButHasGreaterThan
+
+/** a<b */
+class NotATagButNoSpaces
+
+/**
+ * test
+ * ```
+ * example = false
+ * ```
+ * <script>alert('hello')</script>
+ */
+class TagOutsideCode
+
+/**
+ * see [[<:<]], or [[>:>]]
+ */
+class LinkToTagLike
diff --git a/scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala b/scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala
@@ -1,10 +1,91 @@
 package dotty.tools.scaladoc
 package tasty.comments
 
+/**
+ * Removes HTML tags except simple ones that can be translated or that are definitely harmless,
+ * translates Scala/Javadoc tags, and generally cleans the input.
+ * Not the fastest code in the world, and should eventually be replaced by a real parser,
+ * but works fine for now. */
 object Cleaner {
   import Regexes._
   import java.util.regex.Matcher
 
+  // Tags that are considered safe enough and do not need escaping
+  private val SafeTags = Set(
+    "a", "abbr", "address", "area", "blockquote", "br", "b", "caption", "cite", "code", "col", "colgroup",
+    "dd", "del", "dfn", "em", "hr", "img", "ins", "i", "kbd", "label", "legend", "pre", "q", "samp",
+    "small", "span", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "var"
+  )
+
+  private def cleanHtml(text: String): String = {
+    val result = StringBuilder()
+    var index = 0
+    var insideCode = false
+    var insideLink = false
+    while (index < text.length) {
+      if (insideCode) {
+        result.append(text(index))
+        if (index >= 2 && text(index) == '`' && text(index - 1) == '`' && text(index - 2) == '`') {
+          insideCode = false
+        }
+        index += 1
+      } else if (insideLink) {
+        result.append(text(index))
+        if (index >= 1 && text(index) == ']' && text(index - 1) == ']') {
+          insideLink = false
+        }
+        index += 1
+      } else if (index <= text.length - 3 && text(index) == '`' && text(index + 1) == '`' && text(index + 2) == '`') {
+        result.append("```")
+        insideCode = true
+        index += 3
+      } else if (index <= text.length - 2 && text(index) == '[' && text(index + 1) == '[') {
+        result.append("[[")
+        insideLink = true
+        index += 2
+      } else if (text(index) == safeTagMarker) {
+        // ignore it, it's a character that should never appear in everyday text anyway
+        index += 1
+      } else if (text(index) == '<') {
+        val endOfNameIndex = text.indexOf(' ', index)
+        val endOfTagIndex = text.indexOf('>', index)
+        if (endOfNameIndex == -1 || endOfNameIndex == index + 1 || endOfTagIndex == -1) {
+          // not actually a tag, e.g., "< hello >", "a<b"
+          result.append("&lt;")
+          index += 1
+        } else {
+          val subStringEndIndex = Math.min(endOfTagIndex, endOfNameIndex)
+          result.append(text.substring(index + 1, subStringEndIndex) match {
+            case "" => "&lt;" // not actually a tag
+            case "p" | "div" => "\n\n"
+            case "h1" => "\n= "
+            case "/h1" => " =\n"
+            case "h2" => "\n== "
+            case "/h2" => " ==\n"
+            case "h3" => "\n=== "
+            case "/h3" => " ===\n"
+            case "h4" | "h5" | "h6" => "\n==== "
+            case "/h4" | "/h5" | "/h6" => " ====\n"
+            case "li" => "\n *  - "
+            case "/li" => ""
+            case other =>
+              val simple = if (other(0) == '/') other.substring(1) else other
+              if (SafeTags(simple)) {
+                s"$safeTagMarker${text.substring(index, endOfTagIndex + 1)}$safeTagMarker"
+              } else {
+                ""
+              }
+          })
+          index = endOfTagIndex + 1
+        }
+      } else {
+        result.append(text(index))
+        index += 1
+      }
+    }
+    result.toString
+  }
+
   /** Prepares the comment for pre-parsing: removes documentation markers and
     * extra whitespace, removes dangerous HTML and Javadoc tags, and splits it
     * into lines.
@@ -18,12 +99,8 @@ object Cleaner {
       }
     }
     val strippedComment = comment.trim.stripPrefix("/*").stripSuffix("*/")
-    val safeComment = DangerousTags.replaceAllIn(strippedComment, { htmlReplacement(_) })
-    val javadoclessComment = JavadocTags.replaceAllIn(safeComment, { javadocReplacement(_) })
-    val markedTagComment =
-      SafeTags.replaceAllIn(javadoclessComment, { mtch =>
-        Matcher.quoteReplacement(s"$safeTagMarker${mtch.matched}$safeTagMarker")
-      })
-    markedTagComment.linesIterator.toList map (cleanLine)
+    val safeComment = cleanHtml(strippedComment)
+    val javadoclessComment = JavadocTags.replaceAllIn(safeComment, javadocReplacement)
+    javadoclessComment.linesIterator.toList.map(cleanLine)
   }
 }
diff --git a/scaladoc/src/dotty/tools/scaladoc/tasty/comments/CommentRegex.scala b/scaladoc/src/dotty/tools/scaladoc/tasty/comments/CommentRegex.scala
@@ -12,12 +12,6 @@ object Regexes {
   val CleanCommentLine =
     new Regex("""(?:\s*\*\s?\s?)?(.*)""")
 
-  /** Dangerous HTML tags that should be replaced by something safer,
-    * such as wiki syntax, or that should be dropped
-    */
-  val DangerousTags =
-    new Regex("""<(/?(div|ol|ul|li|h[1-6]|p))( [^>]*)?/?>|<!--.*-->""")
-
   /** Javadoc tags that should be replaced by something useful, such as wiki
     * syntax, or that should be dropped. */
   val JavadocTags =
@@ -36,27 +30,7 @@ object Regexes {
     }
   }
 
-  /** Maps a dangerous HTML tag to a safe wiki replacement, or an empty string
-    * if it cannot be salvaged. */
-  def htmlReplacement(mtch: Regex.Match): String = mtch.group(1) match {
-    case "p" | "div" => "\n\n"
-    case "h1"  => "\n= "
-    case "/h1" => " =\n"
-    case "h2"  => "\n== "
-    case "/h2" => " ==\n"
-    case "h3"  => "\n=== "
-    case "/h3" => " ===\n"
-    case "h4" | "h5" | "h6" => "\n==== "
-    case "/h4" | "/h5" | "/h6" => " ====\n"
-    case "li" => "\n *  - "
-    case _ => ""
-  }
-
-  /** Safe HTML tags that can be kept. */
-  val SafeTags =
-    new Regex("""((&\w+;)|(&#\d+;)|(</?(abbr|acronym|address|area|a|bdo|big|blockquote|br|button|b|caption|cite|code|col|colgroup|dd|del|dfn|em|fieldset|form|hr|img|input|ins|i|kbd|label|legend|link|map|object|optgroup|option|param|pre|q|samp|select|small|span|strong|sub|sup|table|tbody|td|textarea|tfoot|th|thead|tr|tt|var)( [^>]*)?/?>))""")
-
-  val safeTagMarker = '\u000E'
+  val safeTagMarker = '\u000E' // IMPORTANT: Only change if you've updated the sanitization tests to match
   val endOfLine     = '\u000A'
   val endOfText     = '\u0003'
 

diff --git a/scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala b/scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala
@@ -70,3 +70,19 @@ class BaseHtmlTest:
     assertTrue(s"File at $path does not exisits!", Files.exists(path))
     val document = Jsoup.parse(IO.read(path))
     op(DocumentContext(document, path))
+
+  def docHtml(dir: String, cls: String, syntax: String): String =
+    val dest = Files.createTempDirectory("test-doc").toFile
+    try
+      val args = Scaladoc.Args(
+        name = projectName,
+        tastyFiles = tastyFiles(dir),
+        output = dest,
+        projectVersion = Some(projectVersion),
+        defaultSyntax = List(syntax),
+      )
+      Scaladoc.run(args)(using testContext)
+      val path = dest.toPath.resolve(s"tests/$dir/$cls.html")
+      val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path))
+      doc.select(".doc").html()
+    finally dotty.tools.scaladoc.util.IO.delete(dest)
diff --git a/scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala b/scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala
@@ -13,21 +13,8 @@ import java.nio.file.Files
   */
 class CaretTest extends BaseHtmlTest:
 
-  private def docHtml(cls: String, syntax: String = "markdown"): String =
-    val dest = Files.createTempDirectory("test-doc").toFile
-    try
-      val args = Scaladoc.Args(
-        name = projectName,
-        tastyFiles = tastyFiles("i25517"),
-        output = dest,
-        projectVersion = Some(projectVersion),
-        defaultSyntax = List(syntax),
-      )
-      Scaladoc.run(args)(using testContext)
-      val path = dest.toPath.resolve(s"tests/i25517/$cls.html")
-      val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path))
-      doc.select(".doc").html()
-    finally dotty.tools.scaladoc.util.IO.delete(dest)
+  private def docHtml(cls: String, syntax: String): String =
+    super.docHtml("i25517", cls, syntax)
 
   @Test def supTagsInMarkdown(): Unit =
     val html = docHtml("SupDefault", "markdown")

diff --git a/scaladoc/test/dotty/tools/scaladoc/tasty/comments/TagSanitizationTest.scala b/scaladoc/test/dotty/tools/scaladoc/tasty/comments/TagSanitizationTest.scala
@@ -0,0 +1,46 @@
+package dotty.tools.scaladoc
+package tasty
+package comments
+
+import org.junit.Test
+import org.junit.Assert.*
+
+import java.nio.file.Files
+
+class TagSanitizationTest extends BaseHtmlTest:
+  private def docHtml(cls: String): String =
+    super.docHtml("sanitization", cls, "markdown")
+
+  @Test def scriptTag(): Unit =
+    val html = docHtml("Script")
+    assertEquals("<p>alert('hello')</p>", html)
+
+  @Test def scriptTagWithSpaces(): Unit =
+    val html = docHtml("ScriptWithSpaces")
+    assertEquals("<p>&lt; script &gt;alert('hello')</p>", html)
+
+  @Test def scriptTagWithSafeChar(): Unit =
+    val html = docHtml("FakeSafeScript")
+    assertEquals("<p>alert('hello')</p>", html)
+
+  @Test def notATag(): Unit =
+    val html = docHtml("NotATag")
+    assertEquals("<p>Example &lt; Second &lt;: Third &lt;= Fourth</p>", html)
+
+  @Test def notATagButHasGreaterThan(): Unit =
+    val html = docHtml("NotATagButHasGreaterThan")
+    assertEquals("<p>Example &lt; Second &gt;: Third</p>", html)
+
+  @Test def notATagButNoSpaces(): Unit =
+    val html = docHtml("NotATagButNoSpaces")
+    assertEquals("<p>a&lt;b</p>", html)
+
+  @Test def tagOutsideCode(): Unit =
+    val html = docHtml("TagOutsideCode")
+    assertFalse(html, html.contains("<script>"))
+    assertFalse(html, html.contains("</script>"))
+
+  @Test def linkToTagLike(): Unit =
+    val html = docHtml("LinkToTagLike")
+    // ensure we don't treat the text between <:< and >:> as a tag content
+    assertTrue(html, html.contains("or"))