Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions project/Build.scala
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,7 @@ object Build {

// Settings used when compiling dotty with a non-bootstrapped dotty
lazy val commonBootstrappedSettings = commonDottySettings ++ Seq(
// To enable support of scaladoc and language-server projects you need to change this to true
bspEnabled := false,
bspEnabled := enableBspAllProjects,
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no reason not to do this IMHO

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a very strong reason: you don't want your IDE to recompile every bootstrapped project from scratch every time you change one line in the compiler!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't follow. If you deliberately enable this off-by-default option, why not recompile whatever's necessary in that case?

If there's an issue we should at the very least put it on the same plan as enableBspAllProjects and make it a variable that's easy to toggle without editing the build file.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah never mind, I saw the removal of false and panicked. Using enableBspAllProjects seems accurate.

(Compile / unmanagedSourceDirectories) += baseDirectory.value / "src-bootstrapped",

version := dottyVersion,
Expand Down
11 changes: 11 additions & 0 deletions scaladoc-testcases/src/tests/sanitization.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package tests.sanitization

/** <script>alert('hello')</script> */
class Script

/** < script >alert('hello')</script
> */
class ScriptWithSpaces

/** <script>alert('hello')</script> */
class FakeSafeScript
63 changes: 56 additions & 7 deletions scaladoc/src/dotty/tools/scaladoc/tasty/comments/Cleaner.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,59 @@ object Cleaner {
import Regexes._
import java.util.regex.Matcher

// Tags that are considered safe enough and do not need escaping
private val SafeTags = Set(
"a", "abbr", "address", "area", "blockquote", "br", "b", "caption", "cite", "code", "col", "colgroup",
"dd", "del", "dfn", "em", "hr", "img", "ins", "i", "kbd", "label", "legend", "pre", "q", "samp",
"small", "span", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "var"
)

/** Removes all tags except simple ones that can be translated or that are definitely harmless.
* Not the fastest code in the world, but should work just fine for our purposes. */
private def cleanHtml(text: String): String = {
val result = StringBuilder()
var index = 0
while (index < text.length) {
if (text(index) == safeTagMarker) {
// ignore it, it's a character that should never appear in everyday text anyway
index += 1
} else if (text(index) == '<') {
val endOfTagIndex = text.indexOf('>', index)
if (endOfTagIndex == -1) {
return result.toString
}
val endOfNameIndex = text.indexOf(' ', index)
val subStringEndIndex = if (endOfNameIndex == -1) endOfTagIndex else Math.min(endOfTagIndex, endOfNameIndex)
result.append(text.substring(index + 1, subStringEndIndex) match {
case "p" | "div" => "\n\n"
case "h1" => "\n= "
case "/h1" => " =\n"
case "h2" => "\n== "
case "/h2" => " ==\n"
case "h3" => "\n=== "
case "/h3" => " ===\n"
case "h4" | "h5" | "h6" => "\n==== "
case "/h4" | "/h5" | "/h6" => " ====\n"
case "li" => "\n * - "
case "/li" => ""
case "" => ""
case other =>
val simple = if (other(0) == '/') other.substring(1) else other
if (SafeTags(simple)) {
s"$safeTagMarker${text.substring(index, endOfTagIndex + 1)}$safeTagMarker"
} else {
""
}
})
index = endOfTagIndex + 1
} else {
result.append(text(index))
index += 1
}
}
result.toString
}

/** Prepares the comment for pre-parsing: removes documentation markers and
* extra whitespace, removes dangerous HTML and Javadoc tags, and splits it
* into lines.
Expand All @@ -18,12 +71,8 @@ object Cleaner {
}
}
val strippedComment = comment.trim.stripPrefix("/*").stripSuffix("*/")
val safeComment = DangerousTags.replaceAllIn(strippedComment, { htmlReplacement(_) })
val javadoclessComment = JavadocTags.replaceAllIn(safeComment, { javadocReplacement(_) })
val markedTagComment =
SafeTags.replaceAllIn(javadoclessComment, { mtch =>
Matcher.quoteReplacement(s"$safeTagMarker${mtch.matched}$safeTagMarker")
})
markedTagComment.linesIterator.toList map (cleanLine)
val safeComment = cleanHtml(strippedComment)
val javadoclessComment = JavadocTags.replaceAllIn(safeComment, javadocReplacement)
javadoclessComment.linesIterator.toList.map(cleanLine)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@ object Regexes {
val CleanCommentLine =
new Regex("""(?:\s*\*\s?\s?)?(.*)""")

/** Dangerous HTML tags that should be replaced by something safer,
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a dangerous use of dangerous, pun intended. Those tags were not the kind of tags one should call dangerous.

* such as wiki syntax, or that should be dropped
*/
val DangerousTags =
new Regex("""<(/?(div|ol|ul|li|h[1-6]|p))( [^>]*)?/?>|<!--.*-->""")

/** Javadoc tags that should be replaced by something useful, such as wiki
* syntax, or that should be dropped. */
val JavadocTags =
Expand All @@ -36,27 +30,7 @@ object Regexes {
}
}

/** Maps a dangerous HTML tag to a safe wiki replacement, or an empty string
* if it cannot be salvaged. */
def htmlReplacement(mtch: Regex.Match): String = mtch.group(1) match {
case "p" | "div" => "\n\n"
case "h1" => "\n= "
case "/h1" => " =\n"
case "h2" => "\n== "
case "/h2" => " ==\n"
case "h3" => "\n=== "
case "/h3" => " ===\n"
case "h4" | "h5" | "h6" => "\n==== "
case "/h4" | "/h5" | "/h6" => " ====\n"
case "li" => "\n * - "
case _ => ""
}

/** Safe HTML tags that can be kept. */
val SafeTags =
new Regex("""((&\w+;)|(&#\d+;)|(</?(abbr|acronym|address|area|a|bdo|big|blockquote|br|button|b|caption|cite|code|col|colgroup|dd|del|dfn|em|fieldset|form|hr|img|input|ins|i|kbd|label|legend|link|map|object|optgroup|option|param|pre|q|samp|select|small|span|strong|sub|sup|table|tbody|td|textarea|tfoot|th|thead|tr|tt|var)( [^>]*)?/?>))""")

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed "safe" tags that are related to user input (why would anyone do this?) or considered terminally deprecated (e.g., acronym)

val safeTagMarker = '\u000E'
val safeTagMarker = '\u000E' // IMPORTANT: Only change if you've updated the sanitization tests to match
val endOfLine = '\u000A'
val endOfText = '\u0003'

Expand Down
16 changes: 16 additions & 0 deletions scaladoc/test/dotty/tools/scaladoc/BaseHtmlTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,19 @@ class BaseHtmlTest:
assertTrue(s"File at $path does not exisits!", Files.exists(path))
val document = Jsoup.parse(IO.read(path))
op(DocumentContext(document, path))

def docHtml(dir: String, cls: String, syntax: String): String =
val dest = Files.createTempDirectory("test-doc").toFile
try
val args = Scaladoc.Args(
name = projectName,
tastyFiles = tastyFiles(dir),
output = dest,
projectVersion = Some(projectVersion),
defaultSyntax = List(syntax),
)
Scaladoc.run(args)(using testContext)
val path = dest.toPath.resolve(s"tests/$dir/$cls.html")
val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path))
doc.select(".doc").html()
finally dotty.tools.scaladoc.util.IO.delete(dest)
17 changes: 2 additions & 15 deletions scaladoc/test/dotty/tools/scaladoc/tasty/comments/CaretTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,8 @@ import java.nio.file.Files
*/
class CaretTest extends BaseHtmlTest:

private def docHtml(cls: String, syntax: String = "markdown"): String =
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved one level up so the logic can be shared with the new tests

val dest = Files.createTempDirectory("test-doc").toFile
try
val args = Scaladoc.Args(
name = projectName,
tastyFiles = tastyFiles("i25517"),
output = dest,
projectVersion = Some(projectVersion),
defaultSyntax = List(syntax),
)
Scaladoc.run(args)(using testContext)
val path = dest.toPath.resolve(s"tests/i25517/$cls.html")
val doc = org.jsoup.Jsoup.parse(dotty.tools.scaladoc.util.IO.read(path))
doc.select(".doc").html()
finally dotty.tools.scaladoc.util.IO.delete(dest)
private def docHtml(cls: String, syntax: String): String =
super.docHtml("i25517", cls, syntax)

@Test def supTagsInMarkdown(): Unit =
val html = docHtml("SupDefault", "markdown")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package dotty.tools.scaladoc
package tasty
package comments

import org.junit.Test
import org.junit.Assert.*

import java.nio.file.Files

class TagSanitizationTest extends BaseHtmlTest:
private def docHtml(cls: String): String =
super.docHtml("sanitization", cls, "markdown")

@Test def scriptTag(): Unit =
val html = docHtml("Script")
assertEquals("<p>alert('hello')</p>", html)

@Test def scriptTagWithSpaces(): Unit =
val html = docHtml("ScriptWithSpaces")
assertEquals("<p>alert('hello')</p>", html)

@Test def scriptTagWithSafeChar(): Unit =
val html = docHtml("FakeSafeScript")
assertEquals("<p>alert('hello')</p>", html)
Loading