Fix parsing issues
authorLanius Trolling <lanius@laniustrolling.dev>
Thu, 11 Jan 2024 14:10:13 +0000 (09:10 -0500)
committerLanius Trolling <lanius@laniustrolling.dev>
Thu, 11 Jan 2024 14:11:02 +0000 (09:11 -0500)
src/jvmMain/kotlin/info/mechyrdia/lore/parser.kt
src/jvmMain/kotlin/info/mechyrdia/lore/parser_plain.kt [new file with mode: 0644]
src/jvmMain/kotlin/info/mechyrdia/lore/parser_reply.kt
src/jvmMain/kotlin/info/mechyrdia/lore/parser_tags.kt
src/jvmMain/kotlin/info/mechyrdia/lore/parser_toc.kt
src/jvmMain/kotlin/info/mechyrdia/lore/views_rss.kt

index a4fda8de1ecaa529368a9d55fb0b7a40715b2edf..db20ec997170a22e297682ad3edaeda17b087818 100644 (file)
@@ -147,7 +147,7 @@ sealed class TextParserState<TContext>(
                override fun processCharacter(char: Char): TextParserState<TContext> {
                        return if (char == ']') {
                                val tagType = scope.tags[tag]
-                               if (tagType is TextParserTagType.Direct<TContext> && insideTags.lastOrNull() == InsideTag.DirectTag(tag)) {
+                               if (tagType is TextParserTagType.Direct<TContext> && insideTags.lastOrNull()?.tag == tag) {
                                        appendTextRaw(tagType.end(scope.ctx))
                                        
                                        PlainText(scope, "", insideTags.dropLast(1))
diff --git a/src/jvmMain/kotlin/info/mechyrdia/lore/parser_plain.kt b/src/jvmMain/kotlin/info/mechyrdia/lore/parser_plain.kt
new file mode 100644 (file)
index 0000000..a011fac
--- /dev/null
@@ -0,0 +1,118 @@
+package info.mechyrdia.lore
+
+private val plainTextFormattingTag = TextParserTagType.Direct<Unit>(
+       { _, _ -> "" },
+       { "" },
+)
+
+private val spacedFormattingTag = TextParserTagType.Direct<Unit>(
+       { _, _ -> " " },
+       { " " },
+)
+
+private val embeddedFormattingTag = TextParserTagType.Indirect<Unit> { _, _, _ -> "" }
+
+enum class TextParserFormattingTagPlainText(val type: TextParserTagType<Unit>) {
+       // Basic formatting
+       B(plainTextFormattingTag),
+       I(plainTextFormattingTag),
+       U(plainTextFormattingTag),
+       S(plainTextFormattingTag),
+       SUP(spacedFormattingTag),
+       SUB(spacedFormattingTag),
+       COLOR(plainTextFormattingTag),
+       IPA(plainTextFormattingTag),
+       CODE(plainTextFormattingTag),
+       H1(plainTextFormattingTag),
+       H2(plainTextFormattingTag),
+       H3(plainTextFormattingTag),
+       H4(plainTextFormattingTag),
+       H5(plainTextFormattingTag),
+       H6(plainTextFormattingTag),
+       ALIGN(plainTextFormattingTag),
+       ASIDE(plainTextFormattingTag),
+       BLOCKQUOTE(spacedFormattingTag),
+       
+       // Metadata
+       THUMB(embeddedFormattingTag),
+       
+       // Resource showing
+       IMAGE(embeddedFormattingTag),
+       MODEL(embeddedFormattingTag),
+       AUDIO(embeddedFormattingTag),
+       QUIZ(embeddedFormattingTag),
+       
+       // Lists
+       UL(spacedFormattingTag),
+       OL(spacedFormattingTag),
+       LI(spacedFormattingTag),
+       
+       // Tables
+       TABLE(spacedFormattingTag),
+       TR(spacedFormattingTag),
+       TD(spacedFormattingTag),
+       TH(spacedFormattingTag),
+       
+       // Hyperformatting
+       LINK(plainTextFormattingTag),
+       EXTLINK(plainTextFormattingTag),
+       ANCHOR(embeddedFormattingTag),
+       REDIRECT(embeddedFormattingTag),
+       
+       // Conlangs
+       LANG(plainTextFormattingTag),
+       ALPHABET(embeddedFormattingTag),
+       ;
+       
+       companion object {
+               val asTags: TextParserTags<Unit> by lazy {
+                       TextParserTags(entries.associate { it.name to it.type })
+               }
+       }
+}
+
+enum class TextParserCommentTagsPlainText(val type: TextParserTagType<Unit>) {
+       B(plainTextFormattingTag),
+       I(plainTextFormattingTag),
+       U(plainTextFormattingTag),
+       S(plainTextFormattingTag),
+       SUP(spacedFormattingTag),
+       SUB(spacedFormattingTag),
+       IPA(plainTextFormattingTag),
+       CODE(plainTextFormattingTag),
+       COLOR(plainTextFormattingTag),
+       
+       ALIGN(plainTextFormattingTag),
+       ASIDE(plainTextFormattingTag),
+       
+       UL(spacedFormattingTag),
+       OL(spacedFormattingTag),
+       LI(spacedFormattingTag),
+       
+       TABLE(spacedFormattingTag),
+       TR(spacedFormattingTag),
+       TD(spacedFormattingTag),
+       TH(spacedFormattingTag),
+       URL(spacedFormattingTag),
+       
+       LANG(plainTextFormattingTag),
+       
+       IMGUR(embeddedFormattingTag),
+       IMGBB(embeddedFormattingTag),
+       
+       REPLY(
+               TextParserTagType.Direct(
+                       { _, _ -> ">>" },
+                       { "" },
+               )
+       ),
+       
+       QUOTE(spacedFormattingTag)
+       ;
+       
+       companion object {
+               val asTags: TextParserTags<Unit> by lazy {
+                       TextParserTags(entries.associate { it.name to it.type })
+               }
+       }
+}
index 8299c7a0f84d31e9ad55955f2d1b6ca0e9c2a364..8e96602b81db3188562d4c3171a1a7f102b00b4c 100644 (file)
@@ -24,8 +24,9 @@ enum class TextParserReplyCounterTag(val type: TextParserTagType<CommentRepliesB
        );
        
        companion object {
-               val asTags: TextParserTags<CommentRepliesBuilder>
-                       get() = TextParserTags(entries.associate { it.name to it.type })
+               val asTags: TextParserTags<CommentRepliesBuilder> by lazy {
+                       TextParserTags(entries.associate { it.name to it.type })
+               }
        }
 }
 
index d324c50db50115b7479d3e34e1cb38bf48257832..c77a3cf16d0466ef6333df86ba52d110bfaba78f 100644 (file)
@@ -22,11 +22,28 @@ sealed class TextParserTagType<TContext> {
 value class TextParserTags<TContext> private constructor(private val tags: Map<String, TextParserTagType<TContext>>) {
        operator fun get(name: String) = tags[name.lowercase()]
        
+       operator fun plus(other: TextParserTags<TContext>) = TextParserTags(tags + other.tags)
+       
        companion object {
                operator fun <TContext> invoke(tags: Map<String, TextParserTagType<TContext>>) = TextParserTags(tags.mapKeys { (name, _) -> name.lowercase() })
+               
+               fun <TContext> byIgnoringContext(tags: TextParserTags<Unit>) = TextParserTags<TContext>(tags.tags.mapValues { (_, tag) ->
+                       when (tag) {
+                               is TextParserTagType.Direct -> TextParserTagType.Direct(
+                                       { param, _ -> tag.begin(param, Unit) },
+                                       { _ -> tag.end(Unit) }
+                               )
+                               
+                               is TextParserTagType.Indirect -> TextParserTagType.Indirect { param, content, _ ->
+                                       tag.process(param, content, Unit)
+                               }
+                       }
+               })
        }
 }
 
+fun <TContext> TextParserTags<Unit>.ignoreContext() = TextParserTags.byIgnoringContext<TContext>(this)
+
 enum class TextParserFormattingTag(val type: TextParserTagType<Unit>) {
        // Basic formatting
        B(
@@ -364,8 +381,9 @@ enum class TextParserFormattingTag(val type: TextParserTagType<Unit>) {
        ;
        
        companion object {
-               val asTags: TextParserTags<Unit>
-                       get() = TextParserTags(entries.associate { it.name to it.type })
+               val asTags: TextParserTags<Unit> by lazy {
+                       TextParserTags(entries.associate { it.name to it.type })
+               }
        }
 }
 
@@ -391,15 +409,17 @@ enum class TextParserCommentTags(val type: TextParserTagType<Unit>) {
        TR(TextParserFormattingTag.TR.type),
        TD(TextParserFormattingTag.TD.type),
        TH(TextParserFormattingTag.TH.type),
-       URL(TextParserTagType.Direct(
-               { tagParam, _ ->
-                       val url = tagParam?.let { TextParserState.censorText(it) }
-                       val attr = url?.let { " href=\"$it\" rel=\"ugc nofollow\"" } ?: ""
-                       
-                       "<a$attr>"
-               },
-               { "</a>" },
-       )),
+       URL(
+               TextParserTagType.Direct(
+                       { tagParam, _ ->
+                               val url = tagParam?.let { TextParserState.censorText(it) }
+                               val attr = url?.let { " href=\"$it\" rel=\"ugc nofollow\"" } ?: ""
+                               
+                               "<a$attr>"
+                       },
+                       { "</a>" },
+               )
+       ),
        
        LANG(TextParserFormattingTag.LANG.type),
        
@@ -438,8 +458,9 @@ enum class TextParserCommentTags(val type: TextParserTagType<Unit>) {
        ;
        
        companion object {
-               val asTags: TextParserTags<Unit>
-                       get() = TextParserTags(entries.associate { it.name to it.type })
+               val asTags: TextParserTags<Unit> by lazy {
+                       TextParserTags(entries.associate { it.name to it.type })
+               }
        }
 }
 
@@ -469,4 +490,6 @@ fun headerContentToLabel(content: String) = TextParserState.uncensorText(content
 fun headerContentToAnchor(content: String) = headerContentToLabel(content).replace(NON_ANCHOR_CHAR, "-")
 
 fun imagePathToOpenGraphValue(path: String) = "https://mechyrdia.info/assets/images/${sanitizeLink(path)}"
-fun descriptionContentToPlainText(content: String) = TextParserState.uncensorText(content.replace(INSIDE_TAG_TEXT, ""))
+fun descriptionContentToPlainText(content: String) = TextParserState.uncensorText(content)
+
+fun commentToPlainText(contentRaw: String) = TextParserState.uncensorText(TextParserState.parseText(contentRaw, TextParserCommentTagsPlainText.asTags, Unit).replace("<p>", "").replace("</p>", "").replace("<br/>", ""))
index a8ef1971269a33dbe667d19b1a34be997741b793..4b82f0b66b3b6bdbd6a1e046264fffba383babf7 100644 (file)
@@ -109,7 +109,8 @@ enum class TextParserToCBuilderTag(val type: TextParserTagType<TableOfContentsBu
        );
        
        companion object {
-               val asTags: TextParserTags<TableOfContentsBuilder>
-                       get() = TextParserTags(entries.associate { it.name to it.type })
+               val asTags: TextParserTags<TableOfContentsBuilder> by lazy {
+                       TextParserFormattingTagPlainText.asTags.ignoreContext<TableOfContentsBuilder>() + TextParserTags(entries.associate { it.name to it.type })
+               }
        }
 }
index b4abc54e69222ff11f381c48bb1902d36d4ef164..f2bd7004406fb52ea0f208ab092dcb7290723a06 100644 (file)
@@ -129,7 +129,7 @@ suspend fun ApplicationCall.recentCommentsRssFeedGenerator(): Appendable.() -> U
                items = comments.map { comment ->
                        RssItem(
                                title = "Comment by ${comment.submittedBy.name} on https://mechyrdia.info/lore/${comment.submittedIn}",
-                               description = comment.contentsRaw.replace(INSIDE_TAG_TEXT, "").escapeHTML(),
+                               description = commentToPlainText(comment.contentsRaw),
                                link = "https://mechyrdia.info/comment/view/${comment.id}",
                                author = null,
                                comments = "https://mechyrdia.info/lore/${comment.submittedIn}#comments",