You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
index/src/main/kotlin/moe/odango/index/sync/MyAnimeListPageSync.kt

328 lines
13 KiB
Kotlin

package moe.odango.index.sync
import com.github.kittinunf.fuel.coroutines.awaitString
import com.github.kittinunf.fuel.httpGet
import io.requery.Persistable
import io.requery.kotlin.*
import io.requery.query.function.Random
import io.requery.sql.KotlinEntityDataStore
import moe.odango.index.di
import moe.odango.index.entity.*
import moe.odango.index.scraper.mal.AnimePageScraper
import moe.odango.index.utils.InfoSource
import org.joda.time.DateTime
import org.kodein.di.instance
import java.time.Duration
import java.time.Instant
import java.util.concurrent.TimeUnit
import kotlin.math.max
class MyAnimeListPageSync : ScheduledSync(1, TimeUnit.DAYS) {
val entityStore: KotlinEntityDataStore<Persistable> by di.instance()
override suspend fun run() {
val weekAgo = DateTime((Instant.now() - Duration.ofDays(7)).toEpochMilli())
val infos = entityStore {
val q =
select(MyAnimeListInfo::class) where (MyAnimeListInfo::lastScrape lt weekAgo) or (MyAnimeListInfo::lastScrape.isNull()) orderBy (Random()) limit 300
q().toList()
}
val animes = infos.map { it.anime }
val animeById = animes.associateBy { it.id }
val titles = entityStore {
val q = select(Title::class) where (Title::anime `in` animes) and (Title::source eq InfoSource.MyAnimeList)
q().toList()
}
val genres = entityStore {
val q =
select(AnimeGenre::class) where (AnimeGenre::anime `in` animes) and (AnimeGenre::source eq InfoSource.MyAnimeList)
q().toList()
}.groupBy {
it.anime.id
}.mapValues { entry ->
entry.value.associateBy { it.genre.myAnimeListId }.toMutableMap()
}
val fromRelations = entityStore {
val q =
select(AnimeRelation::class) where (AnimeRelation::from `in` animes) and (AnimeRelation::source eq InfoSource.MyAnimeList)
q().toList()
}
.groupBy { it.from.id }
.mapValues { it.value.associateBy { it.from.myAnimeListId!! }.toMutableMap() }
val toRelations = entityStore {
val q =
select(AnimeRelation::class) where (AnimeRelation::source eq InfoSource.MyAnimeList) and (AnimeRelation::to `in` animes)
q().toList()
}
.groupBy { it.to.id }
.mapValues { it.value.associateBy { it.to.myAnimeListId!! }.toMutableMap() }
val producers = entityStore {
val q =
select(AnimeProducer::class) where (AnimeProducer::anime `in` animes) and (AnimeProducer::source eq InfoSource.MyAnimeList)
q().toList()
}
.groupBy { it.anime.id }
.mapValues { it.value.associateBy { it.producer.myAnimeListId!! }.toMutableMap() }
val titlesByMyAnimeListId = mutableMapOf<Long, MutableMap<String, Title>>()
for (title in titles) {
titlesByMyAnimeListId
.getOrPut(title.anime.myAnimeListId!!, ::mutableMapOf)[title.name] = title
}
val bodies = mutableMapOf<Long, String>()
for (info in infos) {
try {
val myAnimeListId = info.anime.myAnimeListId!!
println("=> Fetching MAL Page: $myAnimeListId")
val body = "https://myanimelist.net/anime/$myAnimeListId/"
.httpGet()
.awaitString()
bodies[myAnimeListId] = body
} catch (t: Throwable) {
t.printStackTrace()
}
}
val allGenres = mutableMapOf<Int, Genre>()
val allProducers = mutableMapOf<Int, Producer>()
val seriesToMerge = mutableListOf<Pair<AnimeSeries, AnimeSeries>>()
entityStore.withTransaction {
fun getGenre(genre: AnimePageScraper.Genre): Genre {
return allGenres.getOrPut(genre.id) {
val gq = select(Genre::class) where (Genre::myAnimeListId eq genre.id)
gq().firstOrNull() ?: run {
val gen = Genre {
setMyAnimeListId(genre.id)
setName(genre.name)
}
insert(gen)
gen
}
}
}
fun getProducer(producer: AnimePageScraper.Producer): Producer {
return allProducers.getOrPut(producer.id) {
val pq = select(Producer::class) where (Producer::myAnimeListId eq producer.id)
pq().firstOrNull() ?: run {
val prod = Producer {
setMyAnimeListId(producer.id)
setName(producer.name)
}
insert(prod)
prod
}
}
}
for (info in infos) {
val myAnimeListId = info.anime.myAnimeListId!!
println("=> Indexing MAL Page: $myAnimeListId")
val body = bodies[myAnimeListId] ?: continue
val currentTitles = titlesByMyAnimeListId[myAnimeListId] ?: mutableMapOf()
val scraper = AnimePageScraper(body)
val aired = scraper.getAired()
val premiered = scraper.getPremiered()
info.airedEnd = aired?.end?.toDate()
info.airedStart = aired?.start?.toDate()
info.premieredSeason = premiered?.season?.name
info.premieredYear = premiered?.year
info.releaseType = scraper.getReleaseType()
info.image = scraper.getImage()
info.source = scraper.getSource()
info.description = scraper.getDescription()
info.episodes = scraper.getEpisodes()
info.rating = scraper.getRating()
info.duration = scraper.getDuration()?.toSeconds()?.toInt()?.let { max(it, 0) }
info.lastScrape = DateTime.now()
update(info)
val done = mutableSetOf<String>()
val title = scraper.getTitle()
if (currentTitles.remove(title) == null && !done.contains(title)) {
insert(Title {
setAnime(info.anime)
name = title
setType(Title.TitleType.Main)
setLanguage("x-jat")
setSource(InfoSource.MyAnimeList)
})
}
done.add(title)
val englishName = scraper.getEnglishName()
if (englishName != null && currentTitles.remove(englishName) == null && !done.contains(englishName)) {
insert(Title {
setAnime(info.anime)
name = englishName
setType(Title.TitleType.Official)
setLanguage("en")
setSource(InfoSource.MyAnimeList)
})
}
englishName?.let(done::add)
val japaneseName = scraper.getJapaneseName()
if (japaneseName != null && currentTitles.remove(japaneseName) == null && !done.contains(japaneseName)) {
insert(Title {
setAnime(info.anime)
name = japaneseName
setType(Title.TitleType.Official)
setLanguage("ja")
setSource(InfoSource.MyAnimeList)
})
}
japaneseName?.let(done::add)
val synonyms = scraper.getSynonyms()
for (synonym in synonyms) {
if (currentTitles.remove(synonym) == null && !done.contains(synonym)) {
insert(Title {
setAnime(info.anime)
name = synonym
setType(Title.TitleType.Synonym)
setLanguage("x-jat")
setSource(InfoSource.MyAnimeList)
})
}
done.add(synonym)
}
for ((_, currentTitle) in currentTitles) {
delete(currentTitle)
}
val currentGenres = genres[info.anime.id] ?: mutableMapOf()
for (genre in scraper.getGenres()) {
if (currentGenres.remove(genre.id) == null) {
val genreEnt = getGenre(genre)
insert(AnimeGenre {
setAnime(info.anime)
setGenre(genreEnt)
setSource(InfoSource.MyAnimeList)
})
}
}
for ((_, genreEnt) in currentGenres) {
delete(genreEnt)
}
val currentFromRelations = fromRelations[info.anime.id] ?: mutableMapOf()
val currentToRelations = toRelations[info.anime.id] ?: mutableMapOf()
val related = scraper.getRelated()
val relatedAnimesQuery = select(Anime::class) where (Anime::myAnimeListId `in` related.map { it.id })
val relatedAnimes = relatedAnimesQuery().toList().associateBy { it.myAnimeListId!! }.toMutableMap()
for (relation in related) {
val currentFromRelation = currentFromRelations.remove(relation.id)
val relatedAnime = relatedAnimes.getOrPut(relation.id) {
val anim = Anime {
this@Anime.myAnimeListId = relation.id
}
insert(anim)
anim
}
if (currentFromRelation == null) {
insert(AnimeRelation {
setFrom(info.anime)
setTo(relatedAnime)
setSource(InfoSource.MyAnimeList)
this@AnimeRelation.relation = relation.type
})
} else if (currentFromRelation.relation != relation.type) {
currentFromRelation.relation = relation.type
update(currentFromRelation)
}
val currentToRelation = currentToRelations.remove(relation.id)
if (currentToRelation == null) {
insert(AnimeRelation {
setTo(info.anime)
setFrom(relatedAnime)
setSource(InfoSource.MyAnimeList)
this@AnimeRelation.relation = relation.type.inverse
})
}
// Character only have characters of that anime in the other anime
// So are not part of The Series, see e.g. Isekai Quartet
if (relation.type != AnimeRelation.RelationType.Character) {
val currAnime = info.anime
if (relatedAnime.series != null && currAnime.series == null) {
currAnime.series = relatedAnime.series
update(currAnime)
} else if (relatedAnime.series == null && currAnime.series != null) {
relatedAnime.series = currAnime.series
update(relatedAnime)
} else if (relatedAnime.series == null && currAnime.series == null) {
val newSeries = AnimeSeries {}
insert(newSeries)
currAnime.series = newSeries
update(currAnime)
relatedAnime.series = newSeries
update(relatedAnime)
} else if (relatedAnime.series != null && currAnime.series != null) {
seriesToMerge.add(relatedAnime.series!! to currAnime.series!!)
}
}
val currentProducers = producers[info.anime.id] ?: mutableMapOf()
val malProducers = scraper.getProducers()
for (producer in malProducers) {
val currentProducer = currentProducers.remove(producer.producer.id)
if (currentProducer != null) {
if (currentProducer.function != producer.function) {
currentProducer.function = producer.function
update(currentProducer)
}
continue
}
val producerEntity = getProducer(producer.producer)
insert(AnimeProducer {
setAnime(info.anime)
setProducer(producerEntity)
setSource(InfoSource.MyAnimeList)
function = producer.function
})
}
for ((_, currentProducer) in currentProducers) {
delete(currentProducer)
}
}
}
}
}
}