You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
75 lines
2.5 KiB
Kotlin
75 lines
2.5 KiB
Kotlin
package moe.odango.index.utils
|
|
|
|
import org.jsoup.internal.StringUtil
|
|
import org.jsoup.nodes.CDataNode
|
|
import org.jsoup.nodes.Element
|
|
import org.jsoup.nodes.Node
|
|
import org.jsoup.nodes.TextNode
|
|
import org.jsoup.select.NodeTraversor
|
|
import org.jsoup.select.NodeVisitor
|
|
|
|
fun Element.brText(): String {
|
|
val accum = StringUtil.borrowBuilder();
|
|
NodeTraversor.traverse(object : NodeVisitor {
|
|
override fun head(node: Node, depth: Int) {
|
|
if (node is TextNode) {
|
|
appendNormalisedText(accum, node);
|
|
} else if (node is Element) {
|
|
if (accum.isNotEmpty() &&
|
|
((node.isBlock && !accum.lastIsWhitespace()) || node.tagName() == "br")
|
|
) {
|
|
if (node.tagName() == "br") {
|
|
var lastIndex = accum.lastIndex
|
|
while (accum[lastIndex] == ' ') {
|
|
lastIndex--
|
|
}
|
|
accum.delete(lastIndex + 1, accum.length)
|
|
accum.append('\n')
|
|
} else {
|
|
accum.append(' ');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
override fun tail(node: Node, depth: Int) {
|
|
// make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two".
|
|
if (node is Element) {
|
|
if (node.isBlock && (node.nextSibling() is TextNode) && (accum.lastIsWhitespace()))
|
|
accum.append(' ');
|
|
}
|
|
|
|
}
|
|
}, this);
|
|
|
|
return StringUtil.releaseBuilder(accum).trim()
|
|
}
|
|
|
|
fun StringBuilder.lastIsWhitespace() = lastOrNull() == ' ' || lastOrNull() == '\n'
|
|
|
|
fun appendNormalisedText(accum: StringBuilder, textNode: TextNode) {
|
|
val text = textNode.wholeText;
|
|
|
|
if (preserveWhitespace(textNode.parentNode()) || textNode is CDataNode)
|
|
accum.append(text);
|
|
else
|
|
StringUtil.appendNormalisedWhitespace(accum, text, accum.lastIsWhitespace());
|
|
}
|
|
|
|
fun preserveWhitespace(node: Node): Boolean {
|
|
// looks only at this element and five levels up, to prevent recursion & needless stack searches
|
|
if (node is Element) {
|
|
var el: Node? = node
|
|
var i = 0;
|
|
do {
|
|
val ele = el ?: return false
|
|
|
|
if (ele is Element && ele.tag().preserveWhitespace())
|
|
return true;
|
|
el = ele.parent()
|
|
i++;
|
|
} while (i < 6 && el != null);
|
|
}
|
|
return false;
|
|
}
|