Improve conlang vocab implementation
authorLanius Trolling <lanius@laniustrolling.dev>
Mon, 30 Dec 2024 00:28:10 +0000 (19:28 -0500)
committerLanius Trolling <lanius@laniustrolling.dev>
Mon, 30 Dec 2024 00:28:10 +0000 (19:28 -0500)
src/main/kotlin/info/mechyrdia/lore/ArticleListing.kt
src/main/kotlin/info/mechyrdia/lore/ParserHtml.kt
src/main/kotlin/info/mechyrdia/lore/ParserPreprocessJson.kt
src/main/kotlin/info/mechyrdia/lore/ParserPreprocessVocab.kt [new file with mode: 0644]
src/main/kotlin/info/mechyrdia/lore/ParserRobot.kt
src/main/resources/static/init.js
src/main/resources/static/style.css

index efa8271cae8faafd0eed18a36ea509e71076ecf4..ccef46cf7cd044a69b2d1082e7c06af0d4cd5cef 100644 (file)
@@ -32,7 +32,7 @@ private val collator: Collator = Collator.getInstance(Locale.US).apply {
        decomposition = Collator.FULL_DECOMPOSITION
 }
 
-fun <T> List<T>.sortedLexically(selector: (T) -> String?) = map { it to collator.getCollationKey(selector(it)) }
+fun <T> Iterable<T>.sortedLexically(selector: (T) -> String?) = map { it to collator.getCollationKey(selector(it)) }
        .sortedBy { it.second }
        .map { (it, _) -> it }
 
index f7cddf7373cd37a66c36ac875f48366ba59dd859..407bb5ed2cc2b529c8091587105382464e808ada 100644 (file)
@@ -653,14 +653,8 @@ enum class FactbookFormattingTag(val type: HtmlLexerTag) {
                        else ({})
                }
        ),
-       VOCAB(HtmlTextBodyLexerTag { _, _, content ->
-               val contentJson = JsonStorageCodec.parseToJsonElement(content).toString()
-               
-               ({
-                       span(classes = "vocab") {
-                               attributes["data-vocab"] = contentJson
-                       }
-               })
+       VOCAB(HtmlLexerTag { env, _, tree ->
+               VocabFormatter.formatVocab(tree, env::processTree)
        }),
        ;
        
index 918aa9aabd4bcb0c88228733142bb007835b4538..8c7d0f4605017d2381ff7fe2b73d238d3445a820 100644 (file)
@@ -60,10 +60,10 @@ fun ParserTree.toPreProcessJson(): JsonElement {
                        is ParserTreeNode.Text -> JsonPrimitive(node.text)
                        ParserTreeNode.LineBreak -> JsonPrimitive("\n\n")
                        is ParserTreeNode.Tag -> if (node isTag "val" && node.param == null) {
-                               val value = node.subNodes.treeToText()
+                               val value = node.subNodes.unparse().trim()
                                value.toBooleanStrictOrNull()?.let { JsonPrimitive(it) }
                                        ?: value.toDoubleOrNull()?.let { JsonPrimitive(it) }
-                                       ?: JsonPrimitive(value)
+                                       ?: JsonPrimitive("[val]$value[/val]")
                        } else JsonPrimitive(node.unparse())
                }
        else JsonPrimitive(unparse())
diff --git a/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessVocab.kt b/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessVocab.kt
new file mode 100644 (file)
index 0000000..12d0137
--- /dev/null
@@ -0,0 +1,157 @@
+package info.mechyrdia.lore
+
+import info.mechyrdia.JsonStorageCodec
+import kotlinx.html.div
+import kotlinx.html.form
+import kotlinx.html.span
+import kotlinx.serialization.DeserializationStrategy
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.json.JsonContentPolymorphicSerializer
+import kotlinx.serialization.json.JsonElement
+import kotlinx.serialization.json.JsonPrimitive
+
+@Serializable
+data class LangVocabInputWordType(
+       val formatHead: String,
+       val formatInfo: String? = null,
+)
+
+private val formatForm: Regex = Regex("#([0-9]+)#")
+
+@Serializable(with = LangVocabInputWordTypeValueSerializer::class)
+sealed interface LangVocabInputWordTypeValue {
+       @Serializable
+       @JvmInline
+       value class Preset(val key: String) : LangVocabInputWordTypeValue {
+               override fun toString(): String {
+                       return key
+               }
+       }
+       
+       @Serializable
+       @JvmInline
+       value class Irregular(val value: LangVocabInputWordType) : LangVocabInputWordTypeValue
+}
+
+object LangVocabInputWordTypeValueSerializer : JsonContentPolymorphicSerializer<LangVocabInputWordTypeValue>(LangVocabInputWordTypeValue::class) {
+       override fun selectDeserializer(element: JsonElement): DeserializationStrategy<LangVocabInputWordTypeValue> {
+               return when (element) {
+                       is JsonPrimitive -> LangVocabInputWordTypeValue.Preset.serializer()
+                       else -> LangVocabInputWordTypeValue.Irregular.serializer()
+               }
+       }
+}
+
+@Serializable
+data class LangVocabInputWord(
+       val type: LangVocabInputWordTypeValue,
+       val inEnglish: List<String>,
+       val forms: List<String> = emptyList(),
+       val definitions: List<String>,
+)
+
+@Serializable
+data class LangVocabularyInput(
+       val langName: String,
+       val types: Map<String, LangVocabInputWordType>,
+       val words: Map<String, List<LangVocabInputWord>>,
+)
+
+fun Map<String, LangVocabInputWordType>.getWordType(typeValue: LangVocabInputWordTypeValue): LangVocabInputWordType? {
+       return when (typeValue) {
+               is LangVocabInputWordTypeValue.Preset -> this[typeValue.key]
+               is LangVocabInputWordTypeValue.Irregular -> typeValue.value
+       }
+}
+
+object VocabFormatter {
+       fun formatVocab(tree: ParserTree, toHtml: (ParserTree) -> HtmlBuilderSubject): HtmlBuilderSubject {
+               val json = tree.toPreProcessJson()
+               val vocab = JsonStorageCodec.decodeFromJsonElement(LangVocabularyInput.serializer(), json)
+               
+               val langName = toHtml(ParserState.parseText(vocab.langName))
+               
+               val wordMap = mutableMapOf<String, MutableSet<Int>>()
+               val englishKeywordMap = mutableMapOf<String, MutableSet<Int>>()
+               val words = vocab.words.entries
+                       .sortedLexically { it.key }
+                       .flatMap { (word, entries) ->
+                               entries.map { entry -> word to entry }
+                       }
+                       .mapIndexed { i, (word, entry) ->
+                               wordMap.getOrPut(word, ::mutableSetOf).add(i)
+                               
+                               for (keyword in entry.inEnglish)
+                                       englishKeywordMap.getOrPut(keyword, ::mutableSetOf).add(i)
+                               
+                               val wordType = vocab.types.getWordType(entry.type)
+                               if (wordType == null)
+                                       listOf(
+                                               ParserTreeNode.Tag("error", null, listOf(ParserTreeNode.Text("Invalid word type ${entry.type}")))
+                                       )
+                               else {
+                                       val head = ParserState.parseText(wordType.formatHead.replace(formatForm) { match ->
+                                               entry.forms[match.groupValues[1].toInt()]
+                                       })
+                                       
+                                       val defs = listOf(ParserTreeNode.Tag("ol", null, entry.definitions.map { definition ->
+                                               ParserTreeNode.Tag("li", null, ParserState.parseText(definition))
+                                       }))
+                                       
+                                       val info = wordType.formatInfo?.replace(formatForm) { match ->
+                                               entry.forms[match.groupValues[1].toInt()]
+                                       }?.let(ParserState.Companion::parseText).orEmpty()
+                                       
+                                       head + defs + info
+                               }
+                       }
+                       .map(toHtml)
+               
+               return {
+                       div(classes = "vocab") {
+                               span(classes = "vocab-lang-name") {
+                                       append(langName)
+                               }
+                               
+                               span(classes = "vocab-keywords") {
+                                       attributes["data-lang"] = "language"
+                                       for ((word, indices) in wordMap) {
+                                               span {
+                                                       attributes["data-keyword"] = word
+                                                       for (index in indices) {
+                                                               span {
+                                                                       attributes["data-index"] = "$index"
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                               
+                               span(classes = "vocab-keywords") {
+                                       attributes["data-lang"] = "translated"
+                                       for ((keyword, indices) in englishKeywordMap) {
+                                               span {
+                                                       attributes["data-keyword"] = keyword
+                                                       for (index in indices) {
+                                                               span {
+                                                                       attributes["data-index"] = "$index"
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                               
+                               form(classes = "vocab-search-form") {
+                                       attributes["data-method"] = "js"
+                               }
+                               
+                               for ((index, word) in words.withIndex()) {
+                                       div(classes = "vocab-word hide") {
+                                               attributes["data-word-index"] = "$index"
+                                               append(word)
+                                       }
+                               }
+                       }
+               }
+       }
+}
index 889ffac4ada3e5cefdf82b7e09404a6a094b0fff..e72a20b4490d38a4a811d9690e28839dbde1c981 100644 (file)
@@ -161,7 +161,7 @@ enum class FactbookRobotFormattingTag(val type: RobotTextTag) {
                if ("mechyrdian".equals(param, ignoreCase = true))
                        "(preview of Mechyrdia Sans font)"
                else if ("tylan".equals(param, ignoreCase = true))
-                       "(preview of Tylan abugida font)"
+                       "(preview of Tylan alphasyllabary font)"
                else if ("thedish".equals(param, ignoreCase = true))
                        "(preview of Thedish alphabet font)"
                else if ("kishari".equals(param, ignoreCase = true))
index ba2f040bb8f53c32fe81f9d157cf2a8b5840133a..105e1cefcfe38fae38aee9658b3ff7d8a3f4e678 100644 (file)
         * @param {URL} url
         * @param {string} stateMode
         * @param {?(URLSearchParams|FormData)} [formData=undefined]
-        * @return {boolean}
+        * @returns {boolean}
         */
        function goToPage(url, stateMode, formData) {
                if (url.origin !== window.location.origin || !isPagePath(url.pathname) || url.searchParams.getAll("format").filter(format => format.toLowerCase() !== "html").length > 0) {
                                newState.index = history.state.index;
                        }
 
-                       const requestBody = {};
+                       const requestBody = Object.create(null);
                        if (formData != null) {
                                requestBody.body = formData;
                                requestBody.method = "post";
         * @param {SubmitEvent} e
         */
        function formSubmitHandler(e) {
+               if (e.currentTarget.getAttribute("data-method") === "js") {
+                       return;
+               }
+
                const url = new URL(e.currentTarget.action, window.location);
                const formData = formDataUrlEncoded(new FormData(e.currentTarget, e.submitter));
                if (e.currentTarget.method.toLowerCase() === "post") {
                                const key = trimmed.substring(0, eqI).trimEnd();
                                const value = trimmed.substring(eqI + 1).trimStart();
                                return {...obj, [key]: value};
-                       }, {});
+                       }, Object.create(null));
        }
 
        /**
         * @param {number} amount
-        * @return {Promise<void>}
+        * @returns {Promise<void>}
         */
        function delay(amount) {
                return new Promise(resolve => window.setTimeout(resolve, amount));
        }
 
        /**
-        * @return {Promise<DOMHighResTimeStamp>}
+        * @returns {Promise<DOMHighResTimeStamp>}
         */
        function frame() {
                return new Promise(resolve => window.requestAnimationFrame(resolve));
 
        /**
         * @param {string} url
-        * @return {Promise<void>}
+        * @returns {Promise<void>}
         */
        function loadScript(url) {
                return new Promise((resolve, reject) => {
        /**
         * @param {ParentNode} element
         * @param {Array.<string>} lines
-        * @return {void}
+        * @returns {void}
         */
        function appendWithLineBreaks(element, lines) {
                let isFirst = true;
        }
 
        /**
-        * @typedef {{tag: string, attrs: Object.<string, *>, text: (string|{form: string, regexp: string, replacement: string})}} VocabInflectionTableCell
-        * @typedef {Array.<VocabInflectionTableCell>} VocabInflectionTableRow
-        * @typedef {Array.<VocabInflectionTableRow>} VocabInflectionTable
-        * @typedef {{type: string, inEnglish: Array.<string>, forms: Array.<string>, definitions: Array.<string>}} VocabWordEntry
-        * @typedef {Array.<VocabWordEntry>} VocabWord
-        * @typedef {{langName: string, inflections: Object.<string, VocabInflectionTable>, words: Object.<string, VocabWord>}} Vocab
-        *
-        * @param {Vocab} vocab
-        * @returns {HTMLDivElement}
+        * @param {HTMLDivElement} vocabDiv
+        * @returns {void}
         */
-       function renderVocab(vocab) {
-               /**
-                * @param {string} word
-                * @param {number} index
-                * @returns {HTMLDivElement}
-                */
-               function renderWord(word, index) {
-                       const wordRoot = document.createElement("div");
-
-                       const wordLabel = document.createElement("strong");
-                       wordLabel.append(word);
-                       const indexLabel = document.createElement("i");
-                       indexLabel.append("definition " + (index + 1));
-                       wordRoot.appendChild(document.createElement("p")).append(wordLabel, "\u00A0", indexLabel);
-
-                       const definition = vocab.words[word][index];
-                       const inflection = vocab.inflections[definition.type];
-
-                       const defList = wordRoot.appendChild(document.createElement("ol"));
-                       for (const def of definition.definitions) {
-                               defList.appendChild(document.createElement("li")).append(def);
-                       }
+       function renderVocab(vocabDiv) {
+               const localeCompareSorter = (a, b) => a.localeCompare(b);
 
-                       const inflectionTable = wordRoot.appendChild(document.createElement("table"));
-                       for (const row of inflection) {
-                               const rowElem = inflectionTable.appendChild(document.createElement("tr"));
-                               for (const cell of row) {
-                                       const cellElem = rowElem.appendChild(document.createElement(cell.tag));
-                                       for (const attr of Object.keys(cell.attrs)) {
-                                               cellElem.setAttribute(attr, cell.attrs[attr]);
-                                       }
-                                       if ((typeof cell.text) === "string") {
-                                               cellElem.innerHTML = cell.text;
-                                       } else {
-                                               cellElem.innerHTML = definition.forms[cell.text.form].replace(RegExp(cell.text.regexp, "ui"), cell.text.replacement);
-                                       }
-                               }
-                       }
+               const createKeywordSearch = (keywords) => [...keywords.querySelectorAll("span[data-keyword]")]
+                       .reduce((obj, kwElem) => {
+                               const key = kwElem.getAttribute("data-keyword").trim();
+                               const value = [...kwElem.querySelectorAll("span[data-index]")].map(iElem => iElem.getAttribute("data-index"));
+                               return {...obj, [key]: value};
+                       }, Object.create(null));
 
-                       return wordRoot;
-               }
+               const langName = vocabDiv.querySelector("span.vocab-lang-name").innerText.trim();
+               const vocabWords = [...vocabDiv.querySelectorAll("div.vocab-word")];
 
-               const localeCompareSorter = (a, b) => a.localeCompare(b);
+               const languageKeywords = vocabDiv.querySelector("span.vocab-keywords[data-lang=language]");
+               const translatedKeywords = vocabDiv.querySelector("span.vocab-keywords[data-lang=translated]");
 
-               const englishToWord = {};
-               for (const word of Object.keys(vocab.words).sort(localeCompareSorter)) {
-                       const definitions = vocab.words[word];
-                       const definitionsLength = definitions.length;
-                       for (let i = 0; i < definitionsLength; i++) {
-                               for (const keyword of definitions[i].inEnglish) {
-                                       const english = englishToWord[keyword] || (englishToWord[keyword] = []);
-                                       english.push({"word": word, "index": i});
-                               }
-                       }
-               }
+               const languageToWord = createKeywordSearch(languageKeywords);
+               const englishToWord = createKeywordSearch(translatedKeywords);
 
-               const vocabRoot = document.createElement("div");
-               const vocabSearchRoot = vocabRoot.appendChild(document.createElement("form"));
-               const vocabSearchResults = vocabRoot.appendChild(document.createElement("div"));
-               vocabSearchResults.appendChild(document.createElement("i")).append("Search results will appear here");
+               const vocabSearchRoot = vocabDiv.querySelector("form.vocab-search-form");
 
                const vocabSearch = vocabSearchRoot.appendChild(document.createElement("input"));
                vocabSearch.name = "q";
                vocabEnglishToLang.type = "radio";
                vocabEnglishToLang.value = "from-english";
                vocabEnglishToLang.checked = true;
-               vocabEnglishToLangRoot.append("English to " + vocab.langName);
+               vocabEnglishToLangRoot.append("English to " + langName);
 
                vocabSearchRoot.appendChild(document.createElement("br"));
 
                vocabLangToEnglish.name = "target";
                vocabLangToEnglish.type = "radio";
                vocabLangToEnglish.value = "to-english";
-               vocabLangToEnglishRoot.append(vocab.langName + " to English");
+               vocabLangToEnglishRoot.append(langName + " to English");
 
                const vocabSearchButton = vocabSearchRoot.appendChild(document.createElement("input"));
                vocabSearchButton.type = "submit";
                vocabSearchButton.value = "Search";
 
+               const vocabSearchResultNumber = vocabSearchRoot.appendChild(document.createElement("p"));
+               vocabSearchResultNumber.style.fontSize = "0.8em";
+               vocabSearchResultNumber.append("Search results will appear below");
+
                vocabSearchRoot.addEventListener("submit", function (e) {
                        e.preventDefault();
 
                        const searchTerm = vocabSearch.value.trim();
 
-                       vocabSearchResults.replaceChildren();
-
-                       const searchResults = [];
+                       const searchResults = Object.create(null);
                        if (vocabEnglishToLang.checked) {
                                for (const englishWord of Object.keys(englishToWord).sort(localeCompareSorter)) {
                                        if (!englishWord.startsWith(searchTerm)) continue;
 
-                                       for (const vocabItem of englishToWord[englishWord]) {
-                                               if (searchResults.some(result => result.word === vocabItem.word && result.index === vocabItem.index)) continue;
-
-                                               searchResults.push(vocabItem);
+                                       for (const wordIndex of englishToWord[englishWord]) {
+                                               searchResults[wordIndex] = true;
                                        }
                                }
                        } else {
-                               for (const langWord of Object.keys(vocab.words).sort(localeCompareSorter)) {
+                               for (const langWord of Object.keys(languageToWord).sort(localeCompareSorter)) {
                                        if (!langWord.startsWith(searchTerm)) continue;
 
-                                       const numDefs = vocab.words[langWord].length;
-                                       for (let i = 0; i < numDefs; i++) {
-                                               searchResults.push({"word": langWord, "index": i});
+                                       for (const wordIndex of languageToWord[langWord]) {
+                                               searchResults[wordIndex] = true;
                                        }
                                }
                        }
 
-                       if (searchResults.length === 0) {
-                               vocabSearchResults.appendChild(document.createElement("i")).append("No results found");
-                               return;
+                       const numResults = Object.keys(searchResults).length;
+                       if (numResults === 1) {
+                               vocabSearchResultNumber.replaceChildren("1 result found");
+                       } else {
+                               vocabSearchResultNumber.replaceChildren(`${numResults} results found`);
                        }
 
-                       searchResults.sort((a, b) => (a.word === b.word) ? (a.index - b.index) : localeCompareSorter(a.word, b.word));
-
-                       for (const searchResult of searchResults) {
-                               vocabSearchResults.append(renderWord(searchResult.word, searchResult.index));
+                       for (const vocabWord of vocabWords) {
+                               const vocabWordIndex = vocabWord.getAttribute("data-word-index");
+                               if (searchResults[vocabWordIndex]) {
+                                       vocabWord.classList.remove("hide");
+                               } else {
+                                       vocabWord.classList.add("hide");
+                               }
                        }
                });
-
-               return vocabRoot;
        }
 
        /**
                 * @returns {QuizOutcome}
                 */
                function calculateResults() {
-                       const total = {};
+                       const total = Object.create(null);
                        for (const result of questionAnswers) {
                                for (const resKey of Object.keys(result)) {
                                        if (total[resKey] == null) {
                (function () {
                        // Render vocab
 
-                       const vocabSpans = dom.querySelectorAll("span.vocab");
-                       for (const vocabSpan of vocabSpans) {
-                               const vocab = JSON.parse(vocabSpan.getAttribute("data-vocab"));
-                               vocabSpan.after(renderVocab(vocab));
-                               vocabSpan.remove();
+                       const vocabDivs = dom.querySelectorAll("div.vocab");
+                       for (const vocabDiv of vocabDivs) {
+                               renderVocab(vocabDiv);
                        }
                })();
 
index e0871229481f3fdaae6f94b66eaba9c171f375d0..9026c7b8005611d4533408ac7e909bff59c9451c 100644 (file)
@@ -803,3 +803,15 @@ a.copy-text[data-copying] {
        color: var(--text-color);
        pointer-events: none;
 }
+
+.vocab-lang-name {
+       display: none;
+}
+
+.vocab-keywords {
+       display: none;
+}
+
+.vocab-word.hide {
+       display: none;
+}