From: Lanius Trolling <lanius@laniustrolling.dev>
Date: Mon, 30 Dec 2024 00:28:10 +0000 (-0500)
Subject: Improve conlang vocab implementation
X-Git-Url: https://gitweb.starshipfights.net/?a=commitdiff_plain;h=3775d452d4b13dd86d1cf8fad0cd6f7d5233e0ea;p=factbooks

Improve conlang vocab implementation
---

diff --git a/src/main/kotlin/info/mechyrdia/lore/ArticleListing.kt b/src/main/kotlin/info/mechyrdia/lore/ArticleListing.kt
index efa8271..ccef46c 100644
--- a/src/main/kotlin/info/mechyrdia/lore/ArticleListing.kt
+++ b/src/main/kotlin/info/mechyrdia/lore/ArticleListing.kt
@@ -32,7 +32,7 @@ private val collator: Collator = Collator.getInstance(Locale.US).apply {
 	decomposition = Collator.FULL_DECOMPOSITION
 }
 
-fun <T> List<T>.sortedLexically(selector: (T) -> String?) = map { it to collator.getCollationKey(selector(it)) }
+fun <T> Iterable<T>.sortedLexically(selector: (T) -> String?) = map { it to collator.getCollationKey(selector(it)) }
 	.sortedBy { it.second }
 	.map { (it, _) -> it }
 
diff --git a/src/main/kotlin/info/mechyrdia/lore/ParserHtml.kt b/src/main/kotlin/info/mechyrdia/lore/ParserHtml.kt
index f7cddf7..407bb5e 100644
--- a/src/main/kotlin/info/mechyrdia/lore/ParserHtml.kt
+++ b/src/main/kotlin/info/mechyrdia/lore/ParserHtml.kt
@@ -653,14 +653,8 @@ enum class FactbookFormattingTag(val type: HtmlLexerTag) {
 			else ({})
 		}
 	),
-	VOCAB(HtmlTextBodyLexerTag { _, _, content ->
-		val contentJson = JsonStorageCodec.parseToJsonElement(content).toString()
-		
-		({
-			span(classes = "vocab") {
-				attributes["data-vocab"] = contentJson
-			}
-		})
+	VOCAB(HtmlLexerTag { env, _, tree ->
+		VocabFormatter.formatVocab(tree, env::processTree)
 	}),
 	;
 	
diff --git a/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessJson.kt b/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessJson.kt
index 918aa9a..8c7d0f4 100644
--- a/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessJson.kt
+++ b/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessJson.kt
@@ -60,10 +60,10 @@ fun ParserTree.toPreProcessJson(): JsonElement {
 			is ParserTreeNode.Text -> JsonPrimitive(node.text)
 			ParserTreeNode.LineBreak -> JsonPrimitive("\n\n")
 			is ParserTreeNode.Tag -> if (node isTag "val" && node.param == null) {
-				val value = node.subNodes.treeToText()
+				val value = node.subNodes.unparse().trim()
 				value.toBooleanStrictOrNull()?.let { JsonPrimitive(it) }
 					?: value.toDoubleOrNull()?.let { JsonPrimitive(it) }
-					?: JsonPrimitive(value)
+					?: JsonPrimitive("[val]$value[/val]")
 			} else JsonPrimitive(node.unparse())
 		}
 	else JsonPrimitive(unparse())
diff --git a/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessVocab.kt b/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessVocab.kt
new file mode 100644
index 0000000..12d0137
--- /dev/null
+++ b/src/main/kotlin/info/mechyrdia/lore/ParserPreprocessVocab.kt
@@ -0,0 +1,157 @@
+package info.mechyrdia.lore
+
+import info.mechyrdia.JsonStorageCodec
+import kotlinx.html.div
+import kotlinx.html.form
+import kotlinx.html.span
+import kotlinx.serialization.DeserializationStrategy
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.json.JsonContentPolymorphicSerializer
+import kotlinx.serialization.json.JsonElement
+import kotlinx.serialization.json.JsonPrimitive
+
+@Serializable
+data class LangVocabInputWordType(
+	val formatHead: String,
+	val formatInfo: String? = null,
+)
+
+private val formatForm: Regex = Regex("#([0-9]+)#")
+
+@Serializable(with = LangVocabInputWordTypeValueSerializer::class)
+sealed interface LangVocabInputWordTypeValue {
+	@Serializable
+	@JvmInline
+	value class Preset(val key: String) : LangVocabInputWordTypeValue {
+		override fun toString(): String {
+			return key
+		}
+	}
+	
+	@Serializable
+	@JvmInline
+	value class Irregular(val value: LangVocabInputWordType) : LangVocabInputWordTypeValue
+}
+
+object LangVocabInputWordTypeValueSerializer : JsonContentPolymorphicSerializer<LangVocabInputWordTypeValue>(LangVocabInputWordTypeValue::class) {
+	override fun selectDeserializer(element: JsonElement): DeserializationStrategy<LangVocabInputWordTypeValue> {
+		return when (element) {
+			is JsonPrimitive -> LangVocabInputWordTypeValue.Preset.serializer()
+			else -> LangVocabInputWordTypeValue.Irregular.serializer()
+		}
+	}
+}
+
+@Serializable
+data class LangVocabInputWord(
+	val type: LangVocabInputWordTypeValue,
+	val inEnglish: List<String>,
+	val forms: List<String> = emptyList(),
+	val definitions: List<String>,
+)
+
+@Serializable
+data class LangVocabularyInput(
+	val langName: String,
+	val types: Map<String, LangVocabInputWordType>,
+	val words: Map<String, List<LangVocabInputWord>>,
+)
+
+fun Map<String, LangVocabInputWordType>.getWordType(typeValue: LangVocabInputWordTypeValue): LangVocabInputWordType? {
+	return when (typeValue) {
+		is LangVocabInputWordTypeValue.Preset -> this[typeValue.key]
+		is LangVocabInputWordTypeValue.Irregular -> typeValue.value
+	}
+}
+
+object VocabFormatter {
+	fun formatVocab(tree: ParserTree, toHtml: (ParserTree) -> HtmlBuilderSubject): HtmlBuilderSubject {
+		val json = tree.toPreProcessJson()
+		val vocab = JsonStorageCodec.decodeFromJsonElement(LangVocabularyInput.serializer(), json)
+		
+		val langName = toHtml(ParserState.parseText(vocab.langName))
+		
+		val wordMap = mutableMapOf<String, MutableSet<Int>>()
+		val englishKeywordMap = mutableMapOf<String, MutableSet<Int>>()
+		val words = vocab.words.entries
+			.sortedLexically { it.key }
+			.flatMap { (word, entries) ->
+				entries.map { entry -> word to entry }
+			}
+			.mapIndexed { i, (word, entry) ->
+				wordMap.getOrPut(word, ::mutableSetOf).add(i)
+				
+				for (keyword in entry.inEnglish)
+					englishKeywordMap.getOrPut(keyword, ::mutableSetOf).add(i)
+				
+				val wordType = vocab.types.getWordType(entry.type)
+				if (wordType == null)
+					listOf(
+						ParserTreeNode.Tag("error", null, listOf(ParserTreeNode.Text("Invalid word type ${entry.type}")))
+					)
+				else {
+					val head = ParserState.parseText(wordType.formatHead.replace(formatForm) { match ->
+						entry.forms[match.groupValues[1].toInt()]
+					})
+					
+					val defs = listOf(ParserTreeNode.Tag("ol", null, entry.definitions.map { definition ->
+						ParserTreeNode.Tag("li", null, ParserState.parseText(definition))
+					}))
+					
+					val info = wordType.formatInfo?.replace(formatForm) { match ->
+						entry.forms[match.groupValues[1].toInt()]
+					}?.let(ParserState.Companion::parseText).orEmpty()
+					
+					head + defs + info
+				}
+			}
+			.map(toHtml)
+		
+		return {
+			div(classes = "vocab") {
+				span(classes = "vocab-lang-name") {
+					append(langName)
+				}
+				
+				span(classes = "vocab-keywords") {
+					attributes["data-lang"] = "language"
+					for ((word, indices) in wordMap) {
+						span {
+							attributes["data-keyword"] = word
+							for (index in indices) {
+								span {
+									attributes["data-index"] = "$index"
+								}
+							}
+						}
+					}
+				}
+				
+				span(classes = "vocab-keywords") {
+					attributes["data-lang"] = "translated"
+					for ((keyword, indices) in englishKeywordMap) {
+						span {
+							attributes["data-keyword"] = keyword
+							for (index in indices) {
+								span {
+									attributes["data-index"] = "$index"
+								}
+							}
+						}
+					}
+				}
+				
+				form(classes = "vocab-search-form") {
+					attributes["data-method"] = "js"
+				}
+				
+				for ((index, word) in words.withIndex()) {
+					div(classes = "vocab-word hide") {
+						attributes["data-word-index"] = "$index"
+						append(word)
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/src/main/kotlin/info/mechyrdia/lore/ParserRobot.kt b/src/main/kotlin/info/mechyrdia/lore/ParserRobot.kt
index 889ffac..e72a20b 100644
--- a/src/main/kotlin/info/mechyrdia/lore/ParserRobot.kt
+++ b/src/main/kotlin/info/mechyrdia/lore/ParserRobot.kt
@@ -161,7 +161,7 @@ enum class FactbookRobotFormattingTag(val type: RobotTextTag) {
 		if ("mechyrdian".equals(param, ignoreCase = true))
 			"(preview of Mechyrdia Sans font)"
 		else if ("tylan".equals(param, ignoreCase = true))
-			"(preview of Tylan abugida font)"
+			"(preview of Tylan alphasyllabary font)"
 		else if ("thedish".equals(param, ignoreCase = true))
 			"(preview of Thedish alphabet font)"
 		else if ("kishari".equals(param, ignoreCase = true))
diff --git a/src/main/resources/static/init.js b/src/main/resources/static/init.js
index ba2f040..105e1ce 100644
--- a/src/main/resources/static/init.js
+++ b/src/main/resources/static/init.js
@@ -107,7 +107,7 @@
 	 * @param {URL} url
 	 * @param {string} stateMode
 	 * @param {?(URLSearchParams|FormData)} [formData=undefined]
-	 * @return {boolean}
+	 * @returns {boolean}
 	 */
 	function goToPage(url, stateMode, formData) {
 		if (url.origin !== window.location.origin || !isPagePath(url.pathname) || url.searchParams.getAll("format").filter(format => format.toLowerCase() !== "html").length > 0) {
@@ -148,7 +148,7 @@
 				newState.index = history.state.index;
 			}
 
-			const requestBody = {};
+			const requestBody = Object.create(null);
 			if (formData != null) {
 				requestBody.body = formData;
 				requestBody.method = "post";
@@ -242,6 +242,10 @@
 	 * @param {SubmitEvent} e
 	 */
 	function formSubmitHandler(e) {
+		if (e.currentTarget.getAttribute("data-method") === "js") {
+			return;
+		}
+
 		const url = new URL(e.currentTarget.action, window.location);
 		const formData = formDataUrlEncoded(new FormData(e.currentTarget, e.submitter));
 		if (e.currentTarget.method.toLowerCase() === "post") {
@@ -268,19 +272,19 @@
 				const key = trimmed.substring(0, eqI).trimEnd();
 				const value = trimmed.substring(eqI + 1).trimStart();
 				return {...obj, [key]: value};
-			}, {});
+			}, Object.create(null));
 	}
 
 	/**
 	 * @param {number} amount
-	 * @return {Promise<void>}
+	 * @returns {Promise<void>}
 	 */
 	function delay(amount) {
 		return new Promise(resolve => window.setTimeout(resolve, amount));
 	}
 
 	/**
-	 * @return {Promise<DOMHighResTimeStamp>}
+	 * @returns {Promise<DOMHighResTimeStamp>}
 	 */
 	function frame() {
 		return new Promise(resolve => window.requestAnimationFrame(resolve));
@@ -288,7 +292,7 @@
 
 	/**
 	 * @param {string} url
-	 * @return {Promise<void>}
+	 * @returns {Promise<void>}
 	 */
 	function loadScript(url) {
 		return new Promise((resolve, reject) => {
@@ -303,7 +307,7 @@
 	/**
 	 * @param {ParentNode} element
 	 * @param {Array.<string>} lines
-	 * @return {void}
+	 * @returns {void}
 	 */
 	function appendWithLineBreaks(element, lines) {
 		let isFirst = true;
@@ -327,76 +331,29 @@
 	}
 
 	/**
-	 * @typedef {{tag: string, attrs: Object.<string, *>, text: (string|{form: string, regexp: string, replacement: string})}} VocabInflectionTableCell
-	 * @typedef {Array.<VocabInflectionTableCell>} VocabInflectionTableRow
-	 * @typedef {Array.<VocabInflectionTableRow>} VocabInflectionTable
-	 * @typedef {{type: string, inEnglish: Array.<string>, forms: Array.<string>, definitions: Array.<string>}} VocabWordEntry
-	 * @typedef {Array.<VocabWordEntry>} VocabWord
-	 * @typedef {{langName: string, inflections: Object.<string, VocabInflectionTable>, words: Object.<string, VocabWord>}} Vocab
-	 *
-	 * @param {Vocab} vocab
-	 * @returns {HTMLDivElement}
+	 * @param {HTMLDivElement} vocabDiv
+	 * @returns {void}
 	 */
-	function renderVocab(vocab) {
-		/**
-		 * @param {string} word
-		 * @param {number} index
-		 * @returns {HTMLDivElement}
-		 */
-		function renderWord(word, index) {
-			const wordRoot = document.createElement("div");
-
-			const wordLabel = document.createElement("strong");
-			wordLabel.append(word);
-			const indexLabel = document.createElement("i");
-			indexLabel.append("definition " + (index + 1));
-			wordRoot.appendChild(document.createElement("p")).append(wordLabel, "\u00A0", indexLabel);
-
-			const definition = vocab.words[word][index];
-			const inflection = vocab.inflections[definition.type];
-
-			const defList = wordRoot.appendChild(document.createElement("ol"));
-			for (const def of definition.definitions) {
-				defList.appendChild(document.createElement("li")).append(def);
-			}
+	function renderVocab(vocabDiv) {
+		const localeCompareSorter = (a, b) => a.localeCompare(b);
 
-			const inflectionTable = wordRoot.appendChild(document.createElement("table"));
-			for (const row of inflection) {
-				const rowElem = inflectionTable.appendChild(document.createElement("tr"));
-				for (const cell of row) {
-					const cellElem = rowElem.appendChild(document.createElement(cell.tag));
-					for (const attr of Object.keys(cell.attrs)) {
-						cellElem.setAttribute(attr, cell.attrs[attr]);
-					}
-					if ((typeof cell.text) === "string") {
-						cellElem.innerHTML = cell.text;
-					} else {
-						cellElem.innerHTML = definition.forms[cell.text.form].replace(RegExp(cell.text.regexp, "ui"), cell.text.replacement);
-					}
-				}
-			}
+		const createKeywordSearch = (keywords) => [...keywords.querySelectorAll("span[data-keyword]")]
+			.reduce((obj, kwElem) => {
+				const key = kwElem.getAttribute("data-keyword").trim();
+				const value = [...kwElem.querySelectorAll("span[data-index]")].map(iElem => iElem.getAttribute("data-index"));
+				return {...obj, [key]: value};
+			}, Object.create(null));
 
-			return wordRoot;
-		}
+		const langName = vocabDiv.querySelector("span.vocab-lang-name").innerText.trim();
+		const vocabWords = [...vocabDiv.querySelectorAll("div.vocab-word")];
 
-		const localeCompareSorter = (a, b) => a.localeCompare(b);
+		const languageKeywords = vocabDiv.querySelector("span.vocab-keywords[data-lang=language]");
+		const translatedKeywords = vocabDiv.querySelector("span.vocab-keywords[data-lang=translated]");
 
-		const englishToWord = {};
-		for (const word of Object.keys(vocab.words).sort(localeCompareSorter)) {
-			const definitions = vocab.words[word];
-			const definitionsLength = definitions.length;
-			for (let i = 0; i < definitionsLength; i++) {
-				for (const keyword of definitions[i].inEnglish) {
-					const english = englishToWord[keyword] || (englishToWord[keyword] = []);
-					english.push({"word": word, "index": i});
-				}
-			}
-		}
+		const languageToWord = createKeywordSearch(languageKeywords);
+		const englishToWord = createKeywordSearch(translatedKeywords);
 
-		const vocabRoot = document.createElement("div");
-		const vocabSearchRoot = vocabRoot.appendChild(document.createElement("form"));
-		const vocabSearchResults = vocabRoot.appendChild(document.createElement("div"));
-		vocabSearchResults.appendChild(document.createElement("i")).append("Search results will appear here");
+		const vocabSearchRoot = vocabDiv.querySelector("form.vocab-search-form");
 
 		const vocabSearch = vocabSearchRoot.appendChild(document.createElement("input"));
 		vocabSearch.name = "q";
@@ -408,7 +365,7 @@
 		vocabEnglishToLang.type = "radio";
 		vocabEnglishToLang.value = "from-english";
 		vocabEnglishToLang.checked = true;
-		vocabEnglishToLangRoot.append("English to " + vocab.langName);
+		vocabEnglishToLangRoot.append("English to " + langName);
 
 		vocabSearchRoot.appendChild(document.createElement("br"));
 
@@ -417,54 +374,56 @@
 		vocabLangToEnglish.name = "target";
 		vocabLangToEnglish.type = "radio";
 		vocabLangToEnglish.value = "to-english";
-		vocabLangToEnglishRoot.append(vocab.langName + " to English");
+		vocabLangToEnglishRoot.append(langName + " to English");
 
 		const vocabSearchButton = vocabSearchRoot.appendChild(document.createElement("input"));
 		vocabSearchButton.type = "submit";
 		vocabSearchButton.value = "Search";
 
+		const vocabSearchResultNumber = vocabSearchRoot.appendChild(document.createElement("p"));
+		vocabSearchResultNumber.style.fontSize = "0.8em";
+		vocabSearchResultNumber.append("Search results will appear below");
+
 		vocabSearchRoot.addEventListener("submit", function (e) {
 			e.preventDefault();
 
 			const searchTerm = vocabSearch.value.trim();
 
-			vocabSearchResults.replaceChildren();
-
-			const searchResults = [];
+			const searchResults = Object.create(null);
 			if (vocabEnglishToLang.checked) {
 				for (const englishWord of Object.keys(englishToWord).sort(localeCompareSorter)) {
 					if (!englishWord.startsWith(searchTerm)) continue;
 
-					for (const vocabItem of englishToWord[englishWord]) {
-						if (searchResults.some(result => result.word === vocabItem.word && result.index === vocabItem.index)) continue;
-
-						searchResults.push(vocabItem);
+					for (const wordIndex of englishToWord[englishWord]) {
+						searchResults[wordIndex] = true;
 					}
 				}
 			} else {
-				for (const langWord of Object.keys(vocab.words).sort(localeCompareSorter)) {
+				for (const langWord of Object.keys(languageToWord).sort(localeCompareSorter)) {
 					if (!langWord.startsWith(searchTerm)) continue;
 
-					const numDefs = vocab.words[langWord].length;
-					for (let i = 0; i < numDefs; i++) {
-						searchResults.push({"word": langWord, "index": i});
+					for (const wordIndex of languageToWord[langWord]) {
+						searchResults[wordIndex] = true;
 					}
 				}
 			}
 
-			if (searchResults.length === 0) {
-				vocabSearchResults.appendChild(document.createElement("i")).append("No results found");
-				return;
+			const numResults = Object.keys(searchResults).length;
+			if (numResults === 1) {
+				vocabSearchResultNumber.replaceChildren("1 result found");
+			} else {
+				vocabSearchResultNumber.replaceChildren(`${numResults} results found`);
 			}
 
-			searchResults.sort((a, b) => (a.word === b.word) ? (a.index - b.index) : localeCompareSorter(a.word, b.word));
-
-			for (const searchResult of searchResults) {
-				vocabSearchResults.append(renderWord(searchResult.word, searchResult.index));
+			for (const vocabWord of vocabWords) {
+				const vocabWordIndex = vocabWord.getAttribute("data-word-index");
+				if (searchResults[vocabWordIndex]) {
+					vocabWord.classList.remove("hide");
+				} else {
+					vocabWord.classList.add("hide");
+				}
 			}
 		});
-
-		return vocabRoot;
 	}
 
 	/**
@@ -555,7 +514,7 @@
 		 * @returns {QuizOutcome}
 		 */
 		function calculateResults() {
-			const total = {};
+			const total = Object.create(null);
 			for (const result of questionAnswers) {
 				for (const resKey of Object.keys(result)) {
 					if (total[resKey] == null) {
@@ -1032,11 +991,9 @@
 		(function () {
 			// Render vocab
 
-			const vocabSpans = dom.querySelectorAll("span.vocab");
-			for (const vocabSpan of vocabSpans) {
-				const vocab = JSON.parse(vocabSpan.getAttribute("data-vocab"));
-				vocabSpan.after(renderVocab(vocab));
-				vocabSpan.remove();
+			const vocabDivs = dom.querySelectorAll("div.vocab");
+			for (const vocabDiv of vocabDivs) {
+				renderVocab(vocabDiv);
 			}
 		})();
 
diff --git a/src/main/resources/static/style.css b/src/main/resources/static/style.css
index e087122..9026c7b 100644
--- a/src/main/resources/static/style.css
+++ b/src/main/resources/static/style.css
@@ -803,3 +803,15 @@ a.copy-text[data-copying] {
 	color: var(--text-color);
 	pointer-events: none;
 }
+
+.vocab-lang-name {
+	display: none;
+}
+
+.vocab-keywords {
+	display: none;
+}
+
+.vocab-word.hide {
+	display: none;
+}