From: Lanius Trolling Date: Sat, 27 Jan 2024 14:16:05 +0000 (-0500) Subject: Make bot regexes case-insensitive X-Git-Url: https://gitweb.starshipfights.net/?a=commitdiff_plain;h=17e1aa706bdc3d3aa07095fc1b49eedcf448c8a0;p=factbooks Make bot regexes case-insensitive --- diff --git a/src/jvmMain/kotlin/info/mechyrdia/data/visits.kt b/src/jvmMain/kotlin/info/mechyrdia/data/visits.kt index 9ee6459..4ab2a91 100644 --- a/src/jvmMain/kotlin/info/mechyrdia/data/visits.kt +++ b/src/jvmMain/kotlin/info/mechyrdia/data/visits.kt @@ -15,6 +15,7 @@ import kotlinx.html.p import kotlinx.html.style import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable +import org.intellij.lang.annotations.Language import java.security.MessageDigest import java.time.Instant @@ -125,183 +126,186 @@ fun FlowContent.guestbook(totalsData: PageVisitTotals) { } object RobotDetector { + private fun botRegex(@Language("RegExp") regex: String) = Regex(regex, RegexOption.IGNORE_CASE) + private val botRegexes = listOf( - Regex(" daum[ /]"), - Regex(" deusu/"), - Regex(" yadirectfetcher"), - Regex("(?:^| )site"), - Regex("(?:^|[^g])news"), - Regex("@[a-z]"), - Regex("\\(at\\)[a-z]"), - Regex("\\(github\\.com/"), - Regex("\\[at][a-z]"), - Regex("^12345"), - Regex("^<"), - Regex("^[\\w .\\-()]+(/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?)?$"), - Regex("^[^ ]{50,}$"), - Regex("^active"), - Regex("^ad muncher"), - Regex("^amaya"), - Regex("^anglesharp/"), - Regex("^anonymous"), - Regex("^avsdevicesdk/"), - Regex("^axios/"), - Regex("^bidtellect/"), - Regex("^biglotron"), - Regex("^btwebclient/"), - Regex("^castro"), - Regex("^clamav[ /]"), - Regex("^client/"), - Regex("^cobweb/"), - Regex("^coccoc"), - Regex("^custom"), - Regex("^ddg[_-]android"), - Regex("^discourse"), - Regex("^dispatch/\\d"), - Regex("^downcast/"), - Regex("^duckduckgo"), - Regex("^facebook"), - Regex("^fdm[ /]\\d"), - Regex("^getright/"), - Regex("^gozilla/"), - Regex("^hatena"), - Regex("^hobbit"), - Regex("^hotzonu"), - Regex("^hwcdn/"), - Regex("^jeode/"), - Regex("^jetty/"), - Regex("^jigsaw"), - Regex("^linkdex"), - Regex("^lwp[-: ]"), - Regex("^metauri"), - Regex("^microsoft bits"), - Regex("^movabletype"), - Regex("^mozilla/\\d\\.\\d \\(compatible;?\\)$"), - Regex("^mozilla/\\d\\.\\d \\w*$"), - Regex("^navermailapp"), - Regex("^netsurf"), - Regex("^offline explorer"), - Regex("^php"), - Regex("^postman"), - Regex("^postrank"), - Regex("^python"), - Regex("^read"), - Regex("^reed"), - Regex("^restsharp/"), - Regex("^snapchat"), - Regex("^space bison"), - Regex("^svn"), - Regex("^swcd "), - Regex("^taringa"), - Regex("^test certificate info"), - Regex("^thumbor/"), - Regex("^tumblr/"), - Regex("^user-agent:mozilla"), - Regex("^valid"), - Regex("^venus/fedoraplanet"), - Regex("^w3c"), - Regex("^webbandit/"), - Regex("^webcopier"), - Regex("^wget"), - Regex("^whatsapp"), - Regex("^xenu link sleuth"), - Regex("^yahoo"), - Regex("^yandex"), - Regex("^zdm/\\d"), - Regex("^zoom marketplace/"), - Regex("^\\{\\{.*\\}\\}$"), - Regex("adbeat\\.com"), - Regex("appinsights"), - Regex("archive"), - Regex("ask jeeves/teoma"), - Regex("bit\\.ly/"), - Regex("bluecoat drtr"), - Regex("bot"), - Regex("browsex"), - Regex("burpcollaborator"), - Regex("capture"), - Regex("catch"), - Regex("check"), - Regex("chrome-lighthouse"), - Regex("chromeframe"), - Regex("cloud"), - Regex("crawl"), - Regex("cryptoapi"), - Regex("dareboost"), - Regex("datanyze"), - Regex("dataprovider"), - Regex("dejaclick"), - Regex("dmbrowser"), - Regex("download"), - Regex("evc-batch/"), - Regex("feed"), - Regex("firephp"), - Regex("freesafeip"), - Regex("ghost"), - Regex("gomezagent"), - Regex("google"), - Regex("headlesschrome/"), - Regex("http"), - Regex("httrack"), - Regex("hubspot marketing grader"), - Regex("hydra"), - Regex("ibisbrowser"), - Regex("images"), - Regex("iplabel"), - Regex("ips-agent"), - Regex("java"), - Regex("library"), - Regex("mail\\.ru/"), - Regex("manager"), - Regex("monitor"), - Regex("morningscore/"), - Regex("neustar wpm"), - Regex("nutch"), - Regex("offbyone"), - Regex("optimize"), - Regex("pageburst"), - Regex("pagespeed"), - Regex("perl"), - Regex("phantom"), - Regex("pingdom"), - Regex("powermarks"), - Regex("preview"), - Regex("proxy"), - Regex("ptst[ /]\\d"), - Regex("reader"), - Regex("rexx;"), - Regex("rigor"), - Regex("rss"), - Regex("scan"), - Regex("scrape"), - Regex("search"), - Regex("serp ?reputation ?management"), - Regex("server"), - Regex("sogou"), - Regex("sparkler/"), - Regex("speedcurve"), - Regex("spider"), - Regex("splash"), - Regex("statuscake"), - Regex("stumbleupon\\.com"), - Regex("supercleaner"), - Regex("synapse"), - Regex("synthetic"), - Regex("taginspector/"), - Regex("torrent"), - Regex("tracemyfile"), - Regex("transcoder"), - Regex("trendsmapresolver"), - Regex("twingly recon"), - Regex("url"), - Regex("virtuoso"), - Regex("wappalyzer"), - Regex("webglance"), - Regex("webkit2png"), - Regex("websitemetadataretriever"), - Regex("whatcms/"), - Regex("wordpress"), - Regex("zgrab"), + botRegex(" daum[ /]"), + botRegex(" deusu/"), + botRegex(" yadirectfetcher"), + botRegex("(?:^| )site"), + botRegex("(?:^|[^g])news"), + botRegex("@[a-z]"), + botRegex("\\(at\\)[a-z]"), + botRegex("\\(github\\.com/"), + botRegex("\\[at][a-z]"), + botRegex("^12345"), + botRegex("^<"), + botRegex("^[\\w .\\-()]+(/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?)?$"), + botRegex("^[^ ]{50,}$"), + botRegex("^active"), + botRegex("^ad muncher"), + botRegex("^amaya"), + botRegex("^anglesharp/"), + botRegex("^anonymous"), + botRegex("^avsdevicesdk/"), + botRegex("^axios/"), + botRegex("^bidtellect/"), + botRegex("^biglotron"), + botRegex("^btwebclient/"), + botRegex("^castro"), + botRegex("^clamav[ /]"), + botRegex("^client/"), + botRegex("^cobweb/"), + botRegex("^coccoc"), + botRegex("^custom"), + botRegex("^ddg[_-]android"), + botRegex("^discourse"), + botRegex("^dispatch/\\d"), + botRegex("^downcast/"), + botRegex("^duckduckgo"), + botRegex("^facebook"), + botRegex("^fdm[ /]\\d"), + botRegex("^getright/"), + botRegex("^gozilla/"), + botRegex("^hatena"), + botRegex("^hobbit"), + botRegex("^hotzonu"), + botRegex("^hwcdn/"), + botRegex("^jeode/"), + botRegex("^jetty/"), + botRegex("^jigsaw"), + botRegex("^linkdex"), + botRegex("^lwp[-: ]"), + botRegex("^metauri"), + botRegex("^microsoft bits"), + botRegex("^movabletype"), + botRegex("^mozilla/\\d\\.\\d \\(compatible;?\\)$"), + botRegex("^mozilla/\\d\\.\\d \\w*$"), + botRegex("^navermailapp"), + botRegex("^netsurf"), + botRegex("^offline explorer"), + botRegex("^php"), + botRegex("^postman"), + botRegex("^postrank"), + botRegex("^python"), + botRegex("^read"), + botRegex("^reed"), + botRegex("^restsharp/"), + botRegex("^snapchat"), + botRegex("^space bison"), + botRegex("^svn"), + botRegex("^swcd "), + botRegex("^taringa"), + botRegex("^test certificate info"), + botRegex("^thumbor/"), + botRegex("^tumblr/"), + botRegex("^user-agent:mozilla"), + botRegex("^valid"), + botRegex("^venus/fedoraplanet"), + botRegex("^w3c"), + botRegex("^webbandit/"), + botRegex("^webcopier"), + botRegex("^wget"), + botRegex("^whatsapp"), + botRegex("^xenu link sleuth"), + botRegex("^yahoo"), + botRegex("^yandex"), + botRegex("^zdm/\\d"), + botRegex("^zoom marketplace/"), + botRegex("^\\{\\{.*}}$"), + botRegex("adbeat\\.com"), + botRegex("appinsights"), + botRegex("archive"), + botRegex("ask jeeves/teoma"), + botRegex("bit\\.ly/"), + botRegex("bluecoat drtr"), + botRegex("bot"), + botRegex("browsex"), + botRegex("burpcollaborator"), + botRegex("capture"), + botRegex("catch"), + botRegex("check"), + botRegex("chrome-lighthouse"), + botRegex("chromeframe"), + botRegex("cloud"), + botRegex("crawl"), + botRegex("cryptoapi"), + botRegex("dareboost"), + botRegex("datanyze"), + botRegex("dataprovider"), + botRegex("dejaclick"), + botRegex("dmbrowser"), + botRegex("download"), + botRegex("evc-batch/"), + botRegex("feed"), + botRegex("firephp"), + botRegex("freesafeip"), + botRegex("ghost"), + botRegex("gomezagent"), + botRegex("google"), + botRegex("headlesschrome/"), + botRegex("http"), + botRegex("httrack"), + botRegex("hubspot marketing grader"), + botRegex("hydra"), + botRegex("ibisbrowser"), + botRegex("images"), + botRegex("iplabel"), + botRegex("ips-agent"), + botRegex("java"), + botRegex("library"), + botRegex("mail\\.ru/"), + botRegex("manager"), + botRegex("monitor"), + botRegex("morningscore/"), + botRegex("neustar wpm"), + botRegex("nutch"), + botRegex("offbyone"), + botRegex("optimize"), + botRegex("pageburst"), + botRegex("pagespeed"), + botRegex("perl"), + botRegex("phantom"), + botRegex("pingdom"), + botRegex("powermarks"), + botRegex("preview"), + botRegex("proxy"), + botRegex("ptst[ /]\\d"), + botRegex("rainmeter webparser plugin"), + botRegex("reader"), + botRegex("rexx;"), + botRegex("rigor"), + botRegex("rss"), + botRegex("scan"), + botRegex("scrape"), + botRegex("search"), + botRegex("serp ?reputation ?management"), + botRegex("server"), + botRegex("sogou"), + botRegex("sparkler/"), + botRegex("speedcurve"), + botRegex("spider"), + botRegex("splash"), + botRegex("statuscake"), + botRegex("stumbleupon\\.com"), + botRegex("supercleaner"), + botRegex("synapse"), + botRegex("synthetic"), + botRegex("taginspector/"), + botRegex("torrent"), + botRegex("tracemyfile"), + botRegex("transcoder"), + botRegex("trendsmapresolver"), + botRegex("twingly recon"), + botRegex("url"), + botRegex("virtuoso"), + botRegex("wappalyzer"), + botRegex("webglance"), + botRegex("webkit2png"), + botRegex("websitemetadataretriever"), + botRegex("whatcms/"), + botRegex("wordpress"), + botRegex("zgrab"), ) fun isRobot(userAgent: String?) = userAgent == null || botRegexes.any { it.containsMatchIn(userAgent) }