import kotlinx.html.style
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
+import org.intellij.lang.annotations.Language
import java.security.MessageDigest
import java.time.Instant
}
object RobotDetector {
+ private fun botRegex(@Language("RegExp") regex: String) = Regex(regex, RegexOption.IGNORE_CASE)
+
private val botRegexes = listOf(
- Regex(" daum[ /]"),
- Regex(" deusu/"),
- Regex(" yadirectfetcher"),
- Regex("(?:^| )site"),
- Regex("(?:^|[^g])news"),
- Regex("@[a-z]"),
- Regex("\\(at\\)[a-z]"),
- Regex("\\(github\\.com/"),
- Regex("\\[at][a-z]"),
- Regex("^12345"),
- Regex("^<"),
- Regex("^[\\w .\\-()]+(/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?)?$"),
- Regex("^[^ ]{50,}$"),
- Regex("^active"),
- Regex("^ad muncher"),
- Regex("^amaya"),
- Regex("^anglesharp/"),
- Regex("^anonymous"),
- Regex("^avsdevicesdk/"),
- Regex("^axios/"),
- Regex("^bidtellect/"),
- Regex("^biglotron"),
- Regex("^btwebclient/"),
- Regex("^castro"),
- Regex("^clamav[ /]"),
- Regex("^client/"),
- Regex("^cobweb/"),
- Regex("^coccoc"),
- Regex("^custom"),
- Regex("^ddg[_-]android"),
- Regex("^discourse"),
- Regex("^dispatch/\\d"),
- Regex("^downcast/"),
- Regex("^duckduckgo"),
- Regex("^facebook"),
- Regex("^fdm[ /]\\d"),
- Regex("^getright/"),
- Regex("^gozilla/"),
- Regex("^hatena"),
- Regex("^hobbit"),
- Regex("^hotzonu"),
- Regex("^hwcdn/"),
- Regex("^jeode/"),
- Regex("^jetty/"),
- Regex("^jigsaw"),
- Regex("^linkdex"),
- Regex("^lwp[-: ]"),
- Regex("^metauri"),
- Regex("^microsoft bits"),
- Regex("^movabletype"),
- Regex("^mozilla/\\d\\.\\d \\(compatible;?\\)$"),
- Regex("^mozilla/\\d\\.\\d \\w*$"),
- Regex("^navermailapp"),
- Regex("^netsurf"),
- Regex("^offline explorer"),
- Regex("^php"),
- Regex("^postman"),
- Regex("^postrank"),
- Regex("^python"),
- Regex("^read"),
- Regex("^reed"),
- Regex("^restsharp/"),
- Regex("^snapchat"),
- Regex("^space bison"),
- Regex("^svn"),
- Regex("^swcd "),
- Regex("^taringa"),
- Regex("^test certificate info"),
- Regex("^thumbor/"),
- Regex("^tumblr/"),
- Regex("^user-agent:mozilla"),
- Regex("^valid"),
- Regex("^venus/fedoraplanet"),
- Regex("^w3c"),
- Regex("^webbandit/"),
- Regex("^webcopier"),
- Regex("^wget"),
- Regex("^whatsapp"),
- Regex("^xenu link sleuth"),
- Regex("^yahoo"),
- Regex("^yandex"),
- Regex("^zdm/\\d"),
- Regex("^zoom marketplace/"),
- Regex("^\\{\\{.*\\}\\}$"),
- Regex("adbeat\\.com"),
- Regex("appinsights"),
- Regex("archive"),
- Regex("ask jeeves/teoma"),
- Regex("bit\\.ly/"),
- Regex("bluecoat drtr"),
- Regex("bot"),
- Regex("browsex"),
- Regex("burpcollaborator"),
- Regex("capture"),
- Regex("catch"),
- Regex("check"),
- Regex("chrome-lighthouse"),
- Regex("chromeframe"),
- Regex("cloud"),
- Regex("crawl"),
- Regex("cryptoapi"),
- Regex("dareboost"),
- Regex("datanyze"),
- Regex("dataprovider"),
- Regex("dejaclick"),
- Regex("dmbrowser"),
- Regex("download"),
- Regex("evc-batch/"),
- Regex("feed"),
- Regex("firephp"),
- Regex("freesafeip"),
- Regex("ghost"),
- Regex("gomezagent"),
- Regex("google"),
- Regex("headlesschrome/"),
- Regex("http"),
- Regex("httrack"),
- Regex("hubspot marketing grader"),
- Regex("hydra"),
- Regex("ibisbrowser"),
- Regex("images"),
- Regex("iplabel"),
- Regex("ips-agent"),
- Regex("java"),
- Regex("library"),
- Regex("mail\\.ru/"),
- Regex("manager"),
- Regex("monitor"),
- Regex("morningscore/"),
- Regex("neustar wpm"),
- Regex("nutch"),
- Regex("offbyone"),
- Regex("optimize"),
- Regex("pageburst"),
- Regex("pagespeed"),
- Regex("perl"),
- Regex("phantom"),
- Regex("pingdom"),
- Regex("powermarks"),
- Regex("preview"),
- Regex("proxy"),
- Regex("ptst[ /]\\d"),
- Regex("reader"),
- Regex("rexx;"),
- Regex("rigor"),
- Regex("rss"),
- Regex("scan"),
- Regex("scrape"),
- Regex("search"),
- Regex("serp ?reputation ?management"),
- Regex("server"),
- Regex("sogou"),
- Regex("sparkler/"),
- Regex("speedcurve"),
- Regex("spider"),
- Regex("splash"),
- Regex("statuscake"),
- Regex("stumbleupon\\.com"),
- Regex("supercleaner"),
- Regex("synapse"),
- Regex("synthetic"),
- Regex("taginspector/"),
- Regex("torrent"),
- Regex("tracemyfile"),
- Regex("transcoder"),
- Regex("trendsmapresolver"),
- Regex("twingly recon"),
- Regex("url"),
- Regex("virtuoso"),
- Regex("wappalyzer"),
- Regex("webglance"),
- Regex("webkit2png"),
- Regex("websitemetadataretriever"),
- Regex("whatcms/"),
- Regex("wordpress"),
- Regex("zgrab"),
+ botRegex(" daum[ /]"),
+ botRegex(" deusu/"),
+ botRegex(" yadirectfetcher"),
+ botRegex("(?:^| )site"),
+ botRegex("(?:^|[^g])news"),
+ botRegex("@[a-z]"),
+ botRegex("\\(at\\)[a-z]"),
+ botRegex("\\(github\\.com/"),
+ botRegex("\\[at][a-z]"),
+ botRegex("^12345"),
+ botRegex("^<"),
+ botRegex("^[\\w .\\-()]+(/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?)?$"),
+ botRegex("^[^ ]{50,}$"),
+ botRegex("^active"),
+ botRegex("^ad muncher"),
+ botRegex("^amaya"),
+ botRegex("^anglesharp/"),
+ botRegex("^anonymous"),
+ botRegex("^avsdevicesdk/"),
+ botRegex("^axios/"),
+ botRegex("^bidtellect/"),
+ botRegex("^biglotron"),
+ botRegex("^btwebclient/"),
+ botRegex("^castro"),
+ botRegex("^clamav[ /]"),
+ botRegex("^client/"),
+ botRegex("^cobweb/"),
+ botRegex("^coccoc"),
+ botRegex("^custom"),
+ botRegex("^ddg[_-]android"),
+ botRegex("^discourse"),
+ botRegex("^dispatch/\\d"),
+ botRegex("^downcast/"),
+ botRegex("^duckduckgo"),
+ botRegex("^facebook"),
+ botRegex("^fdm[ /]\\d"),
+ botRegex("^getright/"),
+ botRegex("^gozilla/"),
+ botRegex("^hatena"),
+ botRegex("^hobbit"),
+ botRegex("^hotzonu"),
+ botRegex("^hwcdn/"),
+ botRegex("^jeode/"),
+ botRegex("^jetty/"),
+ botRegex("^jigsaw"),
+ botRegex("^linkdex"),
+ botRegex("^lwp[-: ]"),
+ botRegex("^metauri"),
+ botRegex("^microsoft bits"),
+ botRegex("^movabletype"),
+ botRegex("^mozilla/\\d\\.\\d \\(compatible;?\\)$"),
+ botRegex("^mozilla/\\d\\.\\d \\w*$"),
+ botRegex("^navermailapp"),
+ botRegex("^netsurf"),
+ botRegex("^offline explorer"),
+ botRegex("^php"),
+ botRegex("^postman"),
+ botRegex("^postrank"),
+ botRegex("^python"),
+ botRegex("^read"),
+ botRegex("^reed"),
+ botRegex("^restsharp/"),
+ botRegex("^snapchat"),
+ botRegex("^space bison"),
+ botRegex("^svn"),
+ botRegex("^swcd "),
+ botRegex("^taringa"),
+ botRegex("^test certificate info"),
+ botRegex("^thumbor/"),
+ botRegex("^tumblr/"),
+ botRegex("^user-agent:mozilla"),
+ botRegex("^valid"),
+ botRegex("^venus/fedoraplanet"),
+ botRegex("^w3c"),
+ botRegex("^webbandit/"),
+ botRegex("^webcopier"),
+ botRegex("^wget"),
+ botRegex("^whatsapp"),
+ botRegex("^xenu link sleuth"),
+ botRegex("^yahoo"),
+ botRegex("^yandex"),
+ botRegex("^zdm/\\d"),
+ botRegex("^zoom marketplace/"),
+ botRegex("^\\{\\{.*}}$"),
+ botRegex("adbeat\\.com"),
+ botRegex("appinsights"),
+ botRegex("archive"),
+ botRegex("ask jeeves/teoma"),
+ botRegex("bit\\.ly/"),
+ botRegex("bluecoat drtr"),
+ botRegex("bot"),
+ botRegex("browsex"),
+ botRegex("burpcollaborator"),
+ botRegex("capture"),
+ botRegex("catch"),
+ botRegex("check"),
+ botRegex("chrome-lighthouse"),
+ botRegex("chromeframe"),
+ botRegex("cloud"),
+ botRegex("crawl"),
+ botRegex("cryptoapi"),
+ botRegex("dareboost"),
+ botRegex("datanyze"),
+ botRegex("dataprovider"),
+ botRegex("dejaclick"),
+ botRegex("dmbrowser"),
+ botRegex("download"),
+ botRegex("evc-batch/"),
+ botRegex("feed"),
+ botRegex("firephp"),
+ botRegex("freesafeip"),
+ botRegex("ghost"),
+ botRegex("gomezagent"),
+ botRegex("google"),
+ botRegex("headlesschrome/"),
+ botRegex("http"),
+ botRegex("httrack"),
+ botRegex("hubspot marketing grader"),
+ botRegex("hydra"),
+ botRegex("ibisbrowser"),
+ botRegex("images"),
+ botRegex("iplabel"),
+ botRegex("ips-agent"),
+ botRegex("java"),
+ botRegex("library"),
+ botRegex("mail\\.ru/"),
+ botRegex("manager"),
+ botRegex("monitor"),
+ botRegex("morningscore/"),
+ botRegex("neustar wpm"),
+ botRegex("nutch"),
+ botRegex("offbyone"),
+ botRegex("optimize"),
+ botRegex("pageburst"),
+ botRegex("pagespeed"),
+ botRegex("perl"),
+ botRegex("phantom"),
+ botRegex("pingdom"),
+ botRegex("powermarks"),
+ botRegex("preview"),
+ botRegex("proxy"),
+ botRegex("ptst[ /]\\d"),
+ botRegex("rainmeter webparser plugin"),
+ botRegex("reader"),
+ botRegex("rexx;"),
+ botRegex("rigor"),
+ botRegex("rss"),
+ botRegex("scan"),
+ botRegex("scrape"),
+ botRegex("search"),
+ botRegex("serp ?reputation ?management"),
+ botRegex("server"),
+ botRegex("sogou"),
+ botRegex("sparkler/"),
+ botRegex("speedcurve"),
+ botRegex("spider"),
+ botRegex("splash"),
+ botRegex("statuscake"),
+ botRegex("stumbleupon\\.com"),
+ botRegex("supercleaner"),
+ botRegex("synapse"),
+ botRegex("synthetic"),
+ botRegex("taginspector/"),
+ botRegex("torrent"),
+ botRegex("tracemyfile"),
+ botRegex("transcoder"),
+ botRegex("trendsmapresolver"),
+ botRegex("twingly recon"),
+ botRegex("url"),
+ botRegex("virtuoso"),
+ botRegex("wappalyzer"),
+ botRegex("webglance"),
+ botRegex("webkit2png"),
+ botRegex("websitemetadataretriever"),
+ botRegex("whatcms/"),
+ botRegex("wordpress"),
+ botRegex("zgrab"),
)
fun isRobot(userAgent: String?) = userAgent == null || botRegexes.any { it.containsMatchIn(userAgent) }