You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.5 KiB
Kotlin

package moe.odango.index.utils
import org.jsoup.internal.StringUtil
import org.jsoup.nodes.CDataNode
import org.jsoup.nodes.Element
import org.jsoup.nodes.Node
import org.jsoup.nodes.TextNode
import org.jsoup.select.NodeTraversor
import org.jsoup.select.NodeVisitor
fun Element.brText(): String {
val accum = StringUtil.borrowBuilder();
NodeTraversor.traverse(object : NodeVisitor {
override fun head(node: Node, depth: Int) {
if (node is TextNode) {
appendNormalisedText(accum, node);
} else if (node is Element) {
if (accum.isNotEmpty() &&
((node.isBlock && !accum.lastIsWhitespace()) || node.tagName() == "br")
) {
if (node.tagName() == "br") {
var lastIndex = accum.lastIndex
while (accum[lastIndex] == ' ') {
lastIndex--
}
accum.delete(lastIndex + 1, accum.length)
accum.append('\n')
} else {
accum.append(' ');
}
}
}
}
override fun tail(node: Node, depth: Int) {
// make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two".
if (node is Element) {
if (node.isBlock && (node.nextSibling() is TextNode) && (accum.lastIsWhitespace()))
accum.append(' ');
}
}
}, this);
return StringUtil.releaseBuilder(accum).trim()
}
fun StringBuilder.lastIsWhitespace() = lastOrNull() == ' ' || lastOrNull() == '\n'
fun appendNormalisedText(accum: StringBuilder, textNode: TextNode) {
val text = textNode.wholeText;
if (preserveWhitespace(textNode.parentNode()) || textNode is CDataNode)
accum.append(text);
else
StringUtil.appendNormalisedWhitespace(accum, text, accum.lastIsWhitespace());
}
fun preserveWhitespace(node: Node): Boolean {
// looks only at this element and five levels up, to prevent recursion & needless stack searches
if (node is Element) {
var el: Node? = node
var i = 0;
do {
val ele = el ?: return false
if (ele is Element && ele.tag().preserveWhitespace())
return true;
el = ele.parent()
i++;
} while (i < 6 && el != null);
}
return false;
}