package moe.odango.index.utils import org.jsoup.internal.StringUtil import org.jsoup.nodes.CDataNode import org.jsoup.nodes.Element import org.jsoup.nodes.Node import org.jsoup.nodes.TextNode import org.jsoup.select.NodeTraversor import org.jsoup.select.NodeVisitor fun Element.brText(): String { val accum = StringUtil.borrowBuilder(); NodeTraversor.traverse(object : NodeVisitor { override fun head(node: Node, depth: Int) { if (node is TextNode) { appendNormalisedText(accum, node); } else if (node is Element) { if (accum.isNotEmpty() && ((node.isBlock && !accum.lastIsWhitespace()) || node.tagName() == "br") ) { if (node.tagName() == "br") { var lastIndex = accum.lastIndex while (accum[lastIndex] == ' ') { lastIndex-- } accum.delete(lastIndex + 1, accum.length) accum.append('\n') } else { accum.append(' '); } } } } override fun tail(node: Node, depth: Int) { // make sure there is a space between block tags and immediately following text nodes
One
Two should be "One Two". if (node is Element) { if (node.isBlock && (node.nextSibling() is TextNode) && (accum.lastIsWhitespace())) accum.append(' '); } } }, this); return StringUtil.releaseBuilder(accum).trim() } fun StringBuilder.lastIsWhitespace() = lastOrNull() == ' ' || lastOrNull() == '\n' fun appendNormalisedText(accum: StringBuilder, textNode: TextNode) { val text = textNode.wholeText; if (preserveWhitespace(textNode.parentNode()) || textNode is CDataNode) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, accum.lastIsWhitespace()); } fun preserveWhitespace(node: Node): Boolean { // looks only at this element and five levels up, to prevent recursion & needless stack searches if (node is Element) { var el: Node? = node var i = 0; do { val ele = el ?: return false if (ele is Element && ele.tag().preserveWhitespace()) return true; el = ele.parent() i++; } while (i < 6 && el != null); } return false; }