You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
247 lines
7.8 KiB
Kotlin
247 lines
7.8 KiB
Kotlin
package moe.odango.index.scraper.mal
|
|
|
|
import moe.odango.index.entity.AnimeRelation
|
|
import moe.odango.index.entity.MyAnimeListInfo
|
|
import moe.odango.index.utils.IntDate
|
|
import moe.odango.index.utils.ProducerFunction
|
|
import moe.odango.index.utils.brText
|
|
import org.jsoup.Jsoup
|
|
import org.jsoup.nodes.Element
|
|
import java.time.Duration
|
|
import java.util.*
|
|
|
|
class AnimePageScraper(body: String) {
|
|
private val dom = Jsoup.parse(body)
|
|
|
|
private val items: MutableMap<String, Pair<String, Element>> = mutableMapOf()
|
|
|
|
data class Info(
|
|
val title: String,
|
|
val englishName: String?,
|
|
val japaneseName: String?,
|
|
val synonyms: List<String>,
|
|
val description: String,
|
|
val type: MyAnimeListInfo.ReleaseType,
|
|
val episodes: Int?,
|
|
val source: String?,
|
|
val image: String?,
|
|
val genres: List<Genre>,
|
|
val aired: Aired?,
|
|
val premiered: Premiered?,
|
|
val rating: MyAnimeListInfo.Rating?,
|
|
val duration: Duration?,
|
|
val related: List<Relation>,
|
|
val producers: List<ProducerRelation>
|
|
)
|
|
|
|
fun getInfo() = Info(
|
|
getTitle(),
|
|
getEnglishName(),
|
|
getJapaneseName(),
|
|
getSynonyms(),
|
|
getDescription(),
|
|
getReleaseType(),
|
|
getEpisodes(),
|
|
getSource(),
|
|
getImage(),
|
|
getGenres(),
|
|
getAired(),
|
|
getPremiered(),
|
|
getRating(),
|
|
getDuration(),
|
|
getRelated(),
|
|
getProducers()
|
|
)
|
|
|
|
fun getEpisodes(): Int? {
|
|
return items["episodes"]?.first?.toIntOrNull()
|
|
}
|
|
|
|
fun getReleaseType(): MyAnimeListInfo.ReleaseType {
|
|
return MyAnimeListInfo.ReleaseType.valueOf(items["type"]!!.first)
|
|
}
|
|
|
|
fun getTitle(): String {
|
|
return dom.select(".h1-title span[itemprop=name]").first().brText().split("\n").first()
|
|
}
|
|
|
|
fun getSynonyms(): List<String> {
|
|
// If the english name or title contains a comma we can't parse synonyms since they're splice by comma's
|
|
return if (getEnglishName()?.contains(",") == true || getTitle().contains(",")) listOf() else items["synonyms"]?.first?.split(
|
|
","
|
|
)?.map { it.trim() } ?: listOf()
|
|
}
|
|
|
|
fun getJapaneseName(): String? {
|
|
return items["japanese"]?.first
|
|
}
|
|
|
|
fun getEnglishName(): String? {
|
|
return items["english"]?.first
|
|
}
|
|
|
|
init {
|
|
val after = dom
|
|
.select(".dark_text")
|
|
.parents()
|
|
|
|
for (item in after) {
|
|
val parts = item.text().split(":", limit = 2)
|
|
val key = parts.first().trim().toLowerCase()
|
|
val value = parts.last().trim()
|
|
items[key] = value to item
|
|
}
|
|
}
|
|
|
|
fun getSource(): String? {
|
|
return items["source"]?.first
|
|
}
|
|
|
|
fun getImage(): String? {
|
|
return dom.selectFirst("img[itemprop=image]")?.attr("data-src")
|
|
}
|
|
|
|
fun getDescription(): String {
|
|
return dom.selectFirst("[itemprop=description]")?.brText() ?: ""
|
|
}
|
|
|
|
fun getAired(): Aired? {
|
|
return items["aired"]?.let {
|
|
val (from, to) = it.first.split(" to ", limit = 2)
|
|
.let { part -> part.first() to part.getOrNull(1) }
|
|
|
|
return Aired(IntDate.parse("MMM d, yyyy", from, Locale.US) ?: return null, to?.let { dateStr ->
|
|
IntDate.parse("MMM d, yyyy", dateStr, Locale.US)
|
|
})
|
|
}
|
|
}
|
|
|
|
data class Aired(val start: IntDate, val end: IntDate? = null)
|
|
|
|
fun getPremiered(): Premiered? {
|
|
val (season, year) = items["premiered"]
|
|
?.first
|
|
?.split(" ", limit = 2)
|
|
?.takeUnless { it.size != 2 }
|
|
?: return null
|
|
|
|
return Premiered(Premiered.Season.fromString(season) ?: return null, year.toIntOrNull() ?: return null)
|
|
}
|
|
|
|
data class Premiered(val season: Season, val year: Int) {
|
|
enum class Season {
|
|
Spring,
|
|
Summer,
|
|
Fall,
|
|
Winter;
|
|
|
|
companion object {
|
|
fun fromString(season: String): Season? {
|
|
return when (season.toLowerCase()) {
|
|
"spring" -> Spring
|
|
"summer" -> Summer
|
|
"fall" -> Fall
|
|
"winter" -> Winter
|
|
else -> null
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fun getGenres(): List<Genre> {
|
|
return items["genres"]?.second?.let {
|
|
it.select("a").map { a ->
|
|
val href = a.attr("href").split("/")
|
|
Genre(href[href.indexOf("genre") + 1].toInt(), a.text().trim())
|
|
}
|
|
} ?: emptyList()
|
|
}
|
|
|
|
data class Genre(val id: Int, val name: String) {
|
|
companion object {
|
|
val COMEDY = Genre(4, "Comedy")
|
|
val DEMONS = Genre(6, "Demons")
|
|
val DRAMA = Genre(8, "Drama")
|
|
val FANTASY = Genre(10, "Fantasy")
|
|
val HENTAI = Genre(12, "Hentai")
|
|
val MAGIC = Genre(16, "Magic")
|
|
val PARODY = Genre(20, "Parody")
|
|
val ROMANCE = Genre(22, "Romance")
|
|
val SCHOOL = Genre(23, "School")
|
|
val SCIFI = Genre(24, "Sci-Fi")
|
|
val SHOUJO = Genre(25, "Shoujo")
|
|
val HAREM = Genre(35, "Harem")
|
|
val MILITARY = Genre(38, "Military")
|
|
}
|
|
}
|
|
|
|
fun getRelated(): List<Relation> {
|
|
return dom.select("table.anime_detail_related_anime tr").flatMap {
|
|
val type = it.child(0).text().trim().replace(":", "")
|
|
it.child(1).select("a").mapNotNull { el ->
|
|
el.attr("href")
|
|
?.let { href ->
|
|
val parts = href.split("/")
|
|
|
|
if (parts.contains("anime")) {
|
|
parts[parts.indexOf("anime") + 1].toLongOrNull()
|
|
} else {
|
|
null
|
|
}
|
|
}
|
|
?.let { id -> Relation(type, id) }
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
fun getProducers(): List<ProducerRelation> {
|
|
fun Pair<String, Element>?.getProducers(): List<Producer> {
|
|
return this?.second?.let {
|
|
it.select("a").mapNotNull { a ->
|
|
val href = a.attr("href").split("/")
|
|
if (href.contains("producer")) Producer(
|
|
href[href.indexOf("producer") + 1].toInt(),
|
|
a.text().trim()
|
|
) else null
|
|
}
|
|
} ?: emptyList()
|
|
}
|
|
|
|
val producers = items["producers"].getProducers()
|
|
val studios = items["studios"].getProducers()
|
|
val licensors = items["licensors"].getProducers()
|
|
|
|
return producers.map { ProducerRelation(ProducerFunction.Producer, it) } +
|
|
studios.map { ProducerRelation(ProducerFunction.Studio, it) } +
|
|
licensors.map { ProducerRelation(ProducerFunction.Licensor, it) }
|
|
}
|
|
|
|
data class ProducerRelation(val function: ProducerFunction, val producer: Producer)
|
|
|
|
data class Producer(val id: Int, val name: String)
|
|
|
|
fun getRating(): MyAnimeListInfo.Rating? {
|
|
return MyAnimeListInfo.Rating.fromString(items["rating"]?.first?.split(" - ")?.first()?.trim() ?: return null)
|
|
}
|
|
|
|
private val durationRegex = Regex("(?:(\\d+) hrs.)?(?:(\\d+) min.)?")
|
|
|
|
fun getDuration(): Duration? {
|
|
val txt = items["duration"]?.first?.split(" per ", limit = 2)?.firstOrNull() ?: return null
|
|
val grps = durationRegex.matchEntire(txt)?.groups ?: return null
|
|
val hrs = grps[1]
|
|
val min = grps[2]
|
|
var x = Duration.ofMillis(0)
|
|
x += Duration.ofHours(hrs?.value?.toInt() ?: 0)
|
|
x += Duration.ofMinutes(min?.value?.toInt() ?: 0)
|
|
return x
|
|
}
|
|
|
|
|
|
data class Relation(val type: AnimeRelation.RelationType, val id: Long) {
|
|
constructor(type: String, id: Long) : this(AnimeRelation.RelationType.fromString(type), id)
|
|
}
|
|
}
|