Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions project/Build.scala
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,7 @@ object Build {

// Settings used when compiling dotty with a non-bootstrapped dotty
lazy val commonBootstrappedSettings = commonDottySettings ++ Seq(
// To enable support of scaladoc and language-server projects you need to change this to true
bspEnabled := false,
bspEnabled := enableBspAllProjects,
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no reason not to do this IMHO

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a very strong reason: you don't want your IDE to recompile every bootstrapped project from scratch every time you change one line in the compiler!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't follow. If you deliberately enable this off-by-default option, why not recompile whatever's necessary in that case?

If there's an issue we should at the very least put it on the same plan as enableBspAllProjects and make it a variable that's easy to toggle without editing the build file.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah never mind, I saw the removal of false and panicked. Using enableBspAllProjects seems accurate.

(Compile / unmanagedSourceDirectories) += baseDirectory.value / "src-bootstrapped",

version := dottyVersion,
Expand Down
34 changes: 34 additions & 0 deletions scaladoc-testcases/src/tests/sanitization.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package tests.sanitization

/** <script>alert('hello')</script> */
class Script

/** < script >alert('hello')</script
> */
class ScriptWithSpaces

/** <script>alert('hello')</script> */
class FakeSafeScript
Copy link
Copy Markdown
Contributor Author

@SolalPirelli SolalPirelli Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the character in the comment that your browser probably doesn't render very well is the same char we internally use to mark "safe" HTML tags (why do we have such a char? because IMHO the entire scaladoc parsing stack is too hacky, and should be rewritten to be a single-pass parser... but that's for another day)


/** Example < Second <: Third <= Fourth */
class NotATag

/** Example < Second >: Third */
class NotATagButHasGreaterThan

/** a<b */
class NotATagButNoSpaces

/**
* test
* ```
* example = false
* ```
* <script>alert('hello')</script>
*/
class TagOutsideCode

/**
* see [[<:<]], or [[>:>]]
*/
class LinkToTagLike
91 changes: 84 additions & 7 deletions scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,91 @@
package dotty.tools.scaladoc
package tasty.comments

/**
* Removes HTML tags except simple ones that can be translated or that are definitely harmless,
* translates Scala/Javadoc tags, and generally cleans the input.
* Not the fastest code in the world, and should eventually be replaced by a real parser,
* but works fine for now. */
object Cleaner {
import Regexes._
import java.util.regex.Matcher

// Tags that are considered safe enough and do not need escaping
private val SafeTags = Set(
"a", "abbr", "address", "area", "blockquote", "br", "b", "caption", "cite", "code", "col", "colgroup",
"dd", "del", "dfn", "em", "hr", "img", "ins", "i", "kbd", "label", "legend", "pre", "q", "samp",
"small", "span", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "var"
)

private def cleanHtml(text: String): String = {
val result = StringBuilder()
var index = 0
var insideCode = false
var insideLink = false
while (index < text.length) {
if (insideCode) {
result.append(text(index))
if (index >= 2 && text(index) == '`' && text(index - 1) == '`' && text(index - 2) == '`') {
insideCode = false
}
index += 1
} else if (insideLink) {
result.append(text(index))
if (index >= 1 && text(index) == ']' && text(index - 1) == ']') {
insideLink = false
}
index += 1
} else if (index <= text.length - 3 && text(index) == '`' && text(index + 1) == '`' && text(index + 2) == '`') {
result.append("```")
insideCode = true
index += 3
} else if (index <= text.length - 2 && text(index) == '[' && text(index + 1) == '[') {
result.append("[[")
insideLink = true
index += 2
} else if (text(index) == safeTagMarker) {
// ignore it, it's a character that should never appear in everyday text anyway
index += 1
} else if (text(index) == '<') {
val endOfNameIndex = text.indexOf(' ', index)
val endOfTagIndex = text.indexOf('>', index)
if (endOfNameIndex == -1 || endOfNameIndex == index + 1 || endOfTagIndex == -1) {
// not actually a tag, e.g., "< hello >", "a<b"
result.append("&lt;")
index += 1
} else {
val subStringEndIndex = Math.min(endOfTagIndex, endOfNameIndex)
result.append(text.substring(index + 1, subStringEndIndex) match {
case "" => "&lt;" // not actually a tag
case "p" | "div" => "\n\n"
case "h1" => "\n= "
case "/h1" => " =\n"
case "h2" => "\n== "
case "/h2" => " ==\n"
case "h3" => "\n=== "
case "/h3" => " ===\n"
case "h4" | "h5" | "h6" => "\n==== "
case "/h4" | "/h5" | "/h6" => " ====\n"
case "li" => "\n * - "
case "/li" => ""
case other =>
val simple = if (other(0) == '/') other.substring(1) else other
if (SafeTags(simple)) {
s"$safeTagMarker${text.substring(index, endOfTagIndex + 1)}$safeTagMarker"
} else {
""
}
})
index = endOfTagIndex + 1
}
} else {
result.append(text(index))
index += 1
}
}
result.toString
}

/** Prepares the comment for pre-parsing: removes documentation markers and
* extra whitespace, removes dangerous HTML and Javadoc tags, and splits it
* into lines.
Expand All @@ -18,12 +99,8 @@ object Cleaner {
}
}
val strippedComment = comment.trim.stripPrefix("/*").stripSuffix("*/")
val safeComment = DangerousTags.replaceAllIn(strippedComment, { htmlReplacement(_) })
val javadoclessComment = JavadocTags.replaceAllIn(safeComment, { javadocReplacement(_) })
val markedTagComment =
SafeTags.replaceAllIn(javadoclessComment, { mtch =>
Matcher.quoteReplacement(s"$safeTagMarker${mtch.matched}$safeTagMarker")
})
markedTagComment.linesIterator.toList map (cleanLine)
val safeComment = cleanHtml(strippedComment)
val javadoclessComment = JavadocTags.replaceAllIn(safeComment, javadocReplacement)
javadoclessComment.linesIterator.toList.map(cleanLine)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@ object Regexes {
val CleanCommentLine =
new Regex("""(?:\s*\*\s?\s?)?(.*)""")

/** Dangerous HTML tags that should be replaced by something safer,
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a dangerous use of dangerous, pun intended. Those tags were not the kind of tags one should call dangerous.

* such as wiki syntax, or that should be dropped
*/
val DangerousTags =
new Regex("""<(/?(div|ol|ul|li|h[1-6]|p))( [^>]*)?/?>|<!--.*-->""")

/** Javadoc tags that should be replaced by something useful, such as wiki
* syntax, or that should be dropped. */
val JavadocTags =
Expand All @@ -36,27 +30,7 @@ object Regexes {
}
}

/** Maps a dangerous HTML tag to a safe wiki replacement, or an empty string
* if it cannot be salvaged. */
def htmlReplacement(mtch: Regex.Match): String = mtch.group(1) match {
case "p" | "div" => "\n\n"
case "h1" => "\n= "
case "/h1" => " =\n"
case "h2" => "\n== "
case "/h2" => " ==\n"
case "h3" => "\n=== "
case "/h3" => " ===\n"
case "h4" | "h5" | "h6" => "\n==== "
case "/h4" | "/h5" | "/h6" => " ====\n"
case "li" => "\n * - "
case _ => ""
}

/** Safe HTML tags that can be kept. */
val SafeTags =
new Regex("""((&\w+;)|(&#\d+;)|(</?(abbr|acronym|address|area|a|bdo|big|blockquote|br|button|b|caption|cite|code|col|colgroup|dd|del|dfn|em|fieldset|form|hr|img|input|ins|i|kbd|label|legend|link|map|object|optgroup|option|param|pre|q|samp|select|small|span|strong|sub|sup|table|tbody|td|textarea|tfoot|th|thead|tr|tt|var)( [^>]*)?/?>))""")

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed "safe" tags that are related to user input (why would anyone do this?) or considered terminally deprecated (e.g., acronym)

val safeTagMarker = '\u000E'
val safeTagMarker = '\u000E' // IMPORTANT: Only change if you've updated the sanitization tests to match
val endOfLine = '\u000A'
val endOfText = '\u0003'

Expand Down
16 changes: 16 additions & 0 deletions scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,19 @@ class BaseHtmlTest:
assertTrue(s"File at $path does not exisits!", Files.exists(path))
val document = Jsoup.parse(IO.read(path))
op(DocumentContext(document, path))

def docHtml(dir: String, cls: String, syntax: String): String =
val dest = Files.createTempDirectory("test-doc").toFile
try
val args = Scaladoc.Args(
name = projectName,
tastyFiles = tastyFiles(dir),
output = dest,
projectVersion = Some(projectVersion),
defaultSyntax = List(syntax),
)
Scaladoc.run(args)(using testContext)
val path = dest.toPath.resolve(s"tests/$dir/$cls.html")
val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path))
doc.select(".doc").html()
finally dotty.tools.scaladoc.util.IO.delete(dest)
17 changes: 2 additions & 15 deletions scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,8 @@ import java.nio.file.Files
*/
class CaretTest extends BaseHtmlTest:

private def docHtml(cls: String, syntax: String = "markdown"): String =
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved one level up so the logic can be shared with the new tests

val dest = Files.createTempDirectory("test-doc").toFile
try
val args = Scaladoc.Args(
name = projectName,
tastyFiles = tastyFiles("i25517"),
output = dest,
projectVersion = Some(projectVersion),
defaultSyntax = List(syntax),
)
Scaladoc.run(args)(using testContext)
val path = dest.toPath.resolve(s"tests/i25517/$cls.html")
val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path))
doc.select(".doc").html()
finally dotty.tools.scaladoc.util.IO.delete(dest)
private def docHtml(cls: String, syntax: String): String =
super.docHtml("i25517", cls, syntax)

@Test def supTagsInMarkdown(): Unit =
val html = docHtml("SupDefault", "markdown")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package dotty.tools.scaladoc
package tasty
package comments

import org.junit.Test
import org.junit.Assert.*

import java.nio.file.Files

class TagSanitizationTest extends BaseHtmlTest:
private def docHtml(cls: String): String =
super.docHtml("sanitization", cls, "markdown")

@Test def scriptTag(): Unit =
val html = docHtml("Script")
assertEquals("<p>alert('hello')</p>", html)

@Test def scriptTagWithSpaces(): Unit =
val html = docHtml("ScriptWithSpaces")
assertEquals("<p>&lt; script &gt;alert('hello')</p>", html)

@Test def scriptTagWithSafeChar(): Unit =
val html = docHtml("FakeSafeScript")
assertEquals("<p>alert('hello')</p>", html)

@Test def notATag(): Unit =
val html = docHtml("NotATag")
assertEquals("<p>Example &lt; Second &lt;: Third &lt;= Fourth</p>", html)

@Test def notATagButHasGreaterThan(): Unit =
val html = docHtml("NotATagButHasGreaterThan")
assertEquals("<p>Example &lt; Second &gt;: Third</p>", html)

@Test def notATagButNoSpaces(): Unit =
val html = docHtml("NotATagButNoSpaces")
assertEquals("<p>a&lt;b</p>", html)

@Test def tagOutsideCode(): Unit =
val html = docHtml("TagOutsideCode")
assertFalse(html, html.contains("<script>"))
assertFalse(html, html.contains("</script>"))

@Test def linkToTagLike(): Unit =
val html = docHtml("LinkToTagLike")
// ensure we don't treat the text between <:< and >:> as a tag content
assertTrue(html, html.contains("or"))
Loading