From bf95138ba8f26f44aaf210c50af538159b813ebe Mon Sep 17 00:00:00 2001 From: Lanius Trolling Date: Thu, 11 Jan 2024 09:10:13 -0500 Subject: [PATCH] Fix parsing issues --- .../kotlin/info/mechyrdia/lore/parser.kt | 2 +- .../info/mechyrdia/lore/parser_plain.kt | 118 ++++++++++++++++++ .../info/mechyrdia/lore/parser_reply.kt | 5 +- .../kotlin/info/mechyrdia/lore/parser_tags.kt | 51 +++++--- .../kotlin/info/mechyrdia/lore/parser_toc.kt | 5 +- .../kotlin/info/mechyrdia/lore/views_rss.kt | 2 +- 6 files changed, 163 insertions(+), 20 deletions(-) create mode 100644 src/jvmMain/kotlin/info/mechyrdia/lore/parser_plain.kt diff --git a/src/jvmMain/kotlin/info/mechyrdia/lore/parser.kt b/src/jvmMain/kotlin/info/mechyrdia/lore/parser.kt index a4fda8d..db20ec9 100644 --- a/src/jvmMain/kotlin/info/mechyrdia/lore/parser.kt +++ b/src/jvmMain/kotlin/info/mechyrdia/lore/parser.kt @@ -147,7 +147,7 @@ sealed class TextParserState( override fun processCharacter(char: Char): TextParserState { return if (char == ']') { val tagType = scope.tags[tag] - if (tagType is TextParserTagType.Direct && insideTags.lastOrNull() == InsideTag.DirectTag(tag)) { + if (tagType is TextParserTagType.Direct && insideTags.lastOrNull()?.tag == tag) { appendTextRaw(tagType.end(scope.ctx)) PlainText(scope, "", insideTags.dropLast(1)) diff --git a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_plain.kt b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_plain.kt new file mode 100644 index 0000000..a011fac --- /dev/null +++ b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_plain.kt @@ -0,0 +1,118 @@ +package info.mechyrdia.lore + +private val plainTextFormattingTag = TextParserTagType.Direct( + { _, _ -> "" }, + { "" }, +) + +private val spacedFormattingTag = TextParserTagType.Direct( + { _, _ -> " " }, + { " " }, +) + +private val embeddedFormattingTag = TextParserTagType.Indirect { _, _, _ -> "" } + +enum class TextParserFormattingTagPlainText(val type: TextParserTagType) { + // Basic formatting + B(plainTextFormattingTag), + I(plainTextFormattingTag), + U(plainTextFormattingTag), + S(plainTextFormattingTag), + SUP(spacedFormattingTag), + SUB(spacedFormattingTag), + COLOR(plainTextFormattingTag), + IPA(plainTextFormattingTag), + CODE(plainTextFormattingTag), + H1(plainTextFormattingTag), + H2(plainTextFormattingTag), + H3(plainTextFormattingTag), + H4(plainTextFormattingTag), + H5(plainTextFormattingTag), + H6(plainTextFormattingTag), + ALIGN(plainTextFormattingTag), + ASIDE(plainTextFormattingTag), + BLOCKQUOTE(spacedFormattingTag), + + // Metadata + THUMB(embeddedFormattingTag), + + // Resource showing + IMAGE(embeddedFormattingTag), + MODEL(embeddedFormattingTag), + AUDIO(embeddedFormattingTag), + QUIZ(embeddedFormattingTag), + + // Lists + UL(spacedFormattingTag), + OL(spacedFormattingTag), + LI(spacedFormattingTag), + + // Tables + TABLE(spacedFormattingTag), + TR(spacedFormattingTag), + TD(spacedFormattingTag), + TH(spacedFormattingTag), + + // Hyperformatting + LINK(plainTextFormattingTag), + EXTLINK(plainTextFormattingTag), + ANCHOR(embeddedFormattingTag), + REDIRECT(embeddedFormattingTag), + + // Conlangs + LANG(plainTextFormattingTag), + ALPHABET(embeddedFormattingTag), + ; + + companion object { + val asTags: TextParserTags by lazy { + TextParserTags(entries.associate { it.name to it.type }) + } + } +} + +enum class TextParserCommentTagsPlainText(val type: TextParserTagType) { + B(plainTextFormattingTag), + I(plainTextFormattingTag), + U(plainTextFormattingTag), + S(plainTextFormattingTag), + SUP(spacedFormattingTag), + SUB(spacedFormattingTag), + IPA(plainTextFormattingTag), + CODE(plainTextFormattingTag), + COLOR(plainTextFormattingTag), + + ALIGN(plainTextFormattingTag), + ASIDE(plainTextFormattingTag), + + UL(spacedFormattingTag), + OL(spacedFormattingTag), + LI(spacedFormattingTag), + + TABLE(spacedFormattingTag), + TR(spacedFormattingTag), + TD(spacedFormattingTag), + TH(spacedFormattingTag), + URL(spacedFormattingTag), + + LANG(plainTextFormattingTag), + + IMGUR(embeddedFormattingTag), + IMGBB(embeddedFormattingTag), + + REPLY( + TextParserTagType.Direct( + { _, _ -> ">>" }, + { "" }, + ) + ), + + QUOTE(spacedFormattingTag) + ; + + companion object { + val asTags: TextParserTags by lazy { + TextParserTags(entries.associate { it.name to it.type }) + } + } +} diff --git a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_reply.kt b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_reply.kt index 8299c7a..8e96602 100644 --- a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_reply.kt +++ b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_reply.kt @@ -24,8 +24,9 @@ enum class TextParserReplyCounterTag(val type: TextParserTagType - get() = TextParserTags(entries.associate { it.name to it.type }) + val asTags: TextParserTags by lazy { + TextParserTags(entries.associate { it.name to it.type }) + } } } diff --git a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_tags.kt b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_tags.kt index d324c50..c77a3cf 100644 --- a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_tags.kt +++ b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_tags.kt @@ -22,11 +22,28 @@ sealed class TextParserTagType { value class TextParserTags private constructor(private val tags: Map>) { operator fun get(name: String) = tags[name.lowercase()] + operator fun plus(other: TextParserTags) = TextParserTags(tags + other.tags) + companion object { operator fun invoke(tags: Map>) = TextParserTags(tags.mapKeys { (name, _) -> name.lowercase() }) + + fun byIgnoringContext(tags: TextParserTags) = TextParserTags(tags.tags.mapValues { (_, tag) -> + when (tag) { + is TextParserTagType.Direct -> TextParserTagType.Direct( + { param, _ -> tag.begin(param, Unit) }, + { _ -> tag.end(Unit) } + ) + + is TextParserTagType.Indirect -> TextParserTagType.Indirect { param, content, _ -> + tag.process(param, content, Unit) + } + } + }) } } +fun TextParserTags.ignoreContext() = TextParserTags.byIgnoringContext(this) + enum class TextParserFormattingTag(val type: TextParserTagType) { // Basic formatting B( @@ -364,8 +381,9 @@ enum class TextParserFormattingTag(val type: TextParserTagType) { ; companion object { - val asTags: TextParserTags - get() = TextParserTags(entries.associate { it.name to it.type }) + val asTags: TextParserTags by lazy { + TextParserTags(entries.associate { it.name to it.type }) + } } } @@ -391,15 +409,17 @@ enum class TextParserCommentTags(val type: TextParserTagType) { TR(TextParserFormattingTag.TR.type), TD(TextParserFormattingTag.TD.type), TH(TextParserFormattingTag.TH.type), - URL(TextParserTagType.Direct( - { tagParam, _ -> - val url = tagParam?.let { TextParserState.censorText(it) } - val attr = url?.let { " href=\"$it\" rel=\"ugc nofollow\"" } ?: "" - - "" - }, - { "" }, - )), + URL( + TextParserTagType.Direct( + { tagParam, _ -> + val url = tagParam?.let { TextParserState.censorText(it) } + val attr = url?.let { " href=\"$it\" rel=\"ugc nofollow\"" } ?: "" + + "" + }, + { "" }, + ) + ), LANG(TextParserFormattingTag.LANG.type), @@ -438,8 +458,9 @@ enum class TextParserCommentTags(val type: TextParserTagType) { ; companion object { - val asTags: TextParserTags - get() = TextParserTags(entries.associate { it.name to it.type }) + val asTags: TextParserTags by lazy { + TextParserTags(entries.associate { it.name to it.type }) + } } } @@ -469,4 +490,6 @@ fun headerContentToLabel(content: String) = TextParserState.uncensorText(content fun headerContentToAnchor(content: String) = headerContentToLabel(content).replace(NON_ANCHOR_CHAR, "-") fun imagePathToOpenGraphValue(path: String) = "https://mechyrdia.info/assets/images/${sanitizeLink(path)}" -fun descriptionContentToPlainText(content: String) = TextParserState.uncensorText(content.replace(INSIDE_TAG_TEXT, "")) +fun descriptionContentToPlainText(content: String) = TextParserState.uncensorText(content) + +fun commentToPlainText(contentRaw: String) = TextParserState.uncensorText(TextParserState.parseText(contentRaw, TextParserCommentTagsPlainText.asTags, Unit).replace("

", "").replace("

", "").replace("
", "")) diff --git a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_toc.kt b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_toc.kt index a8ef197..4b82f0b 100644 --- a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_toc.kt +++ b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_toc.kt @@ -109,7 +109,8 @@ enum class TextParserToCBuilderTag(val type: TextParserTagType - get() = TextParserTags(entries.associate { it.name to it.type }) + val asTags: TextParserTags by lazy { + TextParserFormattingTagPlainText.asTags.ignoreContext() + TextParserTags(entries.associate { it.name to it.type }) + } } } diff --git a/src/jvmMain/kotlin/info/mechyrdia/lore/views_rss.kt b/src/jvmMain/kotlin/info/mechyrdia/lore/views_rss.kt index b4abc54..f2bd700 100644 --- a/src/jvmMain/kotlin/info/mechyrdia/lore/views_rss.kt +++ b/src/jvmMain/kotlin/info/mechyrdia/lore/views_rss.kt @@ -129,7 +129,7 @@ suspend fun ApplicationCall.recentCommentsRssFeedGenerator(): Appendable.() -> U items = comments.map { comment -> RssItem( title = "Comment by ${comment.submittedBy.name} on https://mechyrdia.info/lore/${comment.submittedIn}", - description = comment.contentsRaw.replace(INSIDE_TAG_TEXT, "").escapeHTML(), + description = commentToPlainText(comment.contentsRaw), link = "https://mechyrdia.info/comment/view/${comment.id}", author = null, comments = "https://mechyrdia.info/lore/${comment.submittedIn}#comments", -- 2.25.1