xexiz
09/13/2021, 11:46 AMprivate suspend fun getTotalPages(): Int =
withContext(<http://Dispatchers.IO|Dispatchers.IO>) {
skrape(AsyncFetcher) {
request {
url = "<https://www.capfriendly.com/browse/active>"
}
response {
htmlDocument {
div {
withClass = "pagination"
findFirst {
div {
withClass = "r"
val paginationText = findByIndex(1).text
paginationText.substringAfter(" of ").toInt()
}
}
}
}
}
}
}
• kotlin 1.5.30
• AGP 7.1.0-alpha11
• API 31
• skrapeit:1.1.5
ThanksChristian Dräger
09/14/2021, 6:37 PM@Test
fun `can get total pages`() {
runBlocking {
withContext(<http://Dispatchers.IO|Dispatchers.IO>) {
val totalPages = skrape(AsyncFetcher) {
request {
url = "<https://www.capfriendly.com/browse/active>"
}
response {
htmlDocument {
div {
withClass = "pagination"
findFirst {
div {
withClass = "r"
val paginationText = findByIndex(1).text
paginationText.substringAfter(" of ").toInt()
}
}
}
}
}
}
println(totalPages)
}
}
}
to avoid the string parsing since the links text will probably change more frequent than its attributes i would maybe do something like this:
@Test
fun `can get total pages`() = runBlocking {
withContext(<http://Dispatchers.IO|Dispatchers.IO>) {
val totalPages = skrape(AsyncFetcher) {
request {
url = "<https://www.capfriendly.com/browse/active>"
}
response {
htmlDocument {
div {
withClass = "pagination"
findFirst {
div {
withClass = "r"
a {
findAll { find { it.text == "Last" }?.attribute("data-val") }
}
}
}
}
}
}
}
println(totalPages)
}
}
xexiz
09/15/2021, 2:23 PMprivate suspend fun fetchImdb(): List<User> =
withContext(<http://Dispatchers.IO|Dispatchers.IO>) {
skrape(AsyncFetcher) {
request {
url = "<https://www.imdb.com/chart/top/>"
sslRelaxed = true
}.also { println("call ${it.preparedRequest.url}") }
response {
htmlDocument {
table {
tbody {
withClass = "lister-list"
tr {
findAll {
map {
val title = <http://it.td|it.td> { findSecond { text } }
User(name = title, "", "")
}
}
}
}
}
}
}
}
}
private suspend fun fetchDB(): List<User> {
var players = listOf<User>()
withContext(<http://Dispatchers.IO|Dispatchers.IO>) {
val deferred = ('a'..'z').filterNot { it == 'x' }.map { async { getHockeyDb(it) } }
players = deferred.awaitAll().flatten()
}
println("players total: ${players.size}")
println("players 5: ${players[5]}")
println("players 100: ${players[100]}")
println("players 250: ${players[250]}")
println("players 500: ${players[500]}")
println("players 5000: ${players[5000]}")
return players
}
private suspend fun getHockeyDb(letter: Char): List<User> {
return withContext(<http://Dispatchers.IO|Dispatchers.IO>) {
skrape(AsyncFetcher) {
request {
url = "<https://www.hockeydb.com/ihdb/players/player_ind_$letter.html>"
sslRelaxed = true
}.also { println("call ${it.preparedRequest.url}") }
response {
htmlDocument {
table {
tbody {
tr {
findAll {
map {
val name = <http://it.td|it.td> { it.a { findFirst { text } } }
val team = <http://it.td|it.td> { findByIndex(1) { text } }
val salary = <http://it.td|it.td> { findLast { text } }
User(name, team, salary)
}
}
}
}
}
}
}
}
}
}
Christian Dräger
09/17/2021, 6:36 PMxexiz
09/17/2021, 6:50 PMAsyncFetcher
with the OkHttp client (from ktor-client-okhttp
https://ktor.io/docs/http-client-engines.html#okhttp) and all my problems are fixed. I can now fetch all 1465 players from capfriendly.com and also, it works on all Android versions I previously mentioned and not only on API29+ 🙂Christian Dräger
09/24/2021, 5:24 PMxexiz
09/24/2021, 6:00 PMChristian Dräger
09/25/2021, 7:36 PM