You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

142 lines
4.7 KiB
Kotlin

package moe.odango.index.es
import io.inbot.eskotlinwrapper.IndexRepository
import io.requery.Persistable
import io.requery.kotlin.invoke
import io.requery.sql.KotlinEntityDataStore
import moe.odango.index.config.IndexConfiguration
import moe.odango.index.di
import moe.odango.index.entity.Anime
import moe.odango.index.es.dto.AnimeDescriptionDTO
import moe.odango.index.es.dto.AnimeTitleDTO
import moe.odango.index.utils.InfoSource
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.client.configure
import org.elasticsearch.client.indices.GetIndexRequest
import org.kodein.di.instance
import moe.odango.index.es.dto.AnimeDTO as AnimeDTO
class Indexer {
private val indexRepo by di.instance<IndexRepository<AnimeDTO>>()
private val indexConfig by di.instance<IndexConfiguration>()
private val client by di.instance<RestHighLevelClient>()
private val config by lazy { indexConfig.elastic }
private val entityStore by di.instance<KotlinEntityDataStore<Persistable>>()
fun run() {
createIndex()
index()
client.close()
}
fun createIndex() {
val indexExists = client.indices()
.exists(GetIndexRequest(config.index), RequestOptions.DEFAULT);
if (indexExists)
return
indexRepo.createIndex {
configure {
settings {
replicas = config.replicas
shards = config.shards
addTokenizer("autocomplete") {
this["type"] = "edge_ngram"
this["min_gram"] = 2
this["max_gram"] = 10
this["token_chars"] = listOf("letter")
}
addAnalyzer("autocomplete") {
this["tokenizer"] = "autocomplete"
this["filter"] = listOf("lowercase")
}
addAnalyzer("autocomplete_search") {
this["tokenizer"] = "lowercase"
}
}
mappings {
nestedField("title") {
text("name") {
analyzer = "autocomplete"
searchAnalyzer = "autocomplete_search"
}
}
nestedField("description") {
text("text") {
analyzer = "standard"
searchAnalyzer = "standard"
}
}
keyword("genre")
objField("premiered") {
keyword("season")
number<Int>("year")
}
objField("aired") {
field("start", "date")
field("end", "date")
}
}
}
}
}
fun index() {
var i = 0;
entityStore {
val q = select(Anime::class)
indexRepo.bulk(50) {
for (item in q()) {
if (item.replacedWith != null) continue;
i++
if (i % 1_000 == 0) {
println(" => $i - ${item.id}")
}
index(
item.id.toString(),
AnimeDTO(
item.id,
item.titles.map {
AnimeTitleDTO(
it.name,
it.language,
it.type.toString(),
it.source
)
},
item.myAnimeListInfo?.let {
it.description?.let { desc ->
listOf(
AnimeDescriptionDTO(
desc,
InfoSource.MyAnimeList
)
)
}
} ?: listOf(),
item.genres.map { it.genre.name }.toSet().toList(),
item.myAnimeListInfo?.premiered,
item.myAnimeListInfo?.aired
),
false
)
}
}
}
println(" => Indexed $i entries.")
}
}