import * as fs from "fs";
import * as path from "path";
const langs = {
en: {
abbr: "Eng.",
name: "English",
els: ["i"],
tags: ["en"]
},
zh: {
abbr: "Man.",
name: "Mandarin Chinese",
els: ["span", "i"],
tags: ["zh-Hans", "zh-Latn"]
},
hi: {
abbr: "H.–U.",
name: "Hindi–Urdu",
els: ["span", "span", "i"],
tags: ["hi-Deva", "ur-Arab", "hi-Latn"]
},
es: {
abbr: "Sp.",
name: "Spanish (Castillian)",
els: ["i"],
tags: ["es"]
},
fr: {
abbr: "Fr.",
name: "French",
els: ["i"],
tags: ["fr"]
},
pt: {
abbr: "Pt.",
name: "Portuguese",
els: ["i"],
tags: ["pt"]
},
ar: {
abbr: "Ar.",
name: "Arabic",
els: ["span", "i"],
tags: ["ar-Arab", "ar-Latn"]
},
ms: {
abbr: "M.–I.",
name: "Malay–Indonesian",
els: ["i"],
tags: ["ms"]
},
ru: {
abbr: "Rus.",
name: "Russian",
els: ["span", "i"],
tags: ["ru-Cyrl", "ru-Latn"]
},
sw: {
abbr: "Sw.",
name: "Swahili",
els: ["i"],
tags: ["sw"]
},
la: {
abbr: "Lat.",
name: "Latin",
els: ["i"],
tags: ["la"]
},
grc: {
abbr: "A.Gk.",
name: "Ancient Greek",
els: ["span", "i"],
tags: ["grc-Grek", "grc-Latn"]
},
fa: {
abbr: "Per.",
name: "Persian",
els: ["span", "i"],
tags: ["fa-Arab", "fa-Latn"]
}
};
const markLanguage = brackets => {
const code = brackets.match(/[a-z]+/)[0];
const abbr = `<abbr title="${langs[code].name}">${langs[code].abbr}</abbr>`;
const words = brackets.match(/\[[a-z]+ (.*?)\]/);
if (!words) {
return abbr;
}
return (
abbr +
" " +
words[1]
.split(" / ")
.map(
(word, idx) =>
`<${langs[code].els[idx]} lang="${langs[code].tags[idx]}">${word}</>`
)
.join(" ")
);
};
const hyphenate = text => text.replace(/ /g, "-");
const mark = entry => {
const match = entry.match(/^- (.*?) (\[.*?)(?: <- (.*?))?(?: -> (.*?))?$/);
return {
url: hyphenate(match[1]),
headword: match[1],
definition: match[2]
.replace("[n.]", "<abbr title='noun'>n.</abbr>")
.replace("[v.]", "<abbr title='verb'>v.</abbr>")
.replace("[a.]", "<abbr title='adjective'>a.</abbr>")
.replace("[adv.]", "<abbr title='adverb'>adv.</abbr>")
.replace("[p.n.]", "<abbr title='proper noun'>p.n.</abbr>")
.replace("[prn.]", "<abbr title='pronoun'>prn.</abbr>")
.replace("[det.]", "<abbr title='determiner'>det.</abbr>")
.replace("[prep.]", "<abbr title='preposition'>prep.</abbr>")
.replace("[part.]", "<abbr title='particle'>part.</abbr>")
.replace("[aff.]", "<abbr title='affix'>aff.</abbr>")
.replace("[num.]", "<abbr title='numeral'>num.</abbr>")
.replace("[] ", ""),
etymon: !match[3]
? undefined
: match[3]
.replace(
/\[(?:en|zh|hi|es|fr|pt|ar|ru|sw|ms|la|grc|fa)(?: .*?)?\]/g,
markLanguage
)
.replace("[ditto]", "”")
.replace("[+]", "” +")
.replace(
/\[# (.*?)\]/g,
(match, word) =>
`<a href="#${hyphenate(
word
)}" lang="art-x-basa"><strong>${word}</strong></a>`
),
note: !match[4]
? undefined
: match[4]
.replace("[see]", "▶")
.replace(
/\[# (.*?)\]/g,
(match, word) =>
`<a href="#${hyphenate(
word
)}" lang="art-x-basa"><strong>${word}</strong></a>`
)
};
};
const branch = word =>
!word.match("\n")
? { headword: mark(word) }
: {
headword: mark(word.match(/^.*?$/m)[0]),
children: word
.replace(/^\t/gm, "")
.split(/\n(?=-)/)
.slice(1)
.map(branch)
};
const tree = trunk => trunk.split(/\n(?=-)/).map(branch);
const dictionary = dict => dict.split("\n\n").map(tree);
const enToBasa = fs
.readFileSync("/home/atossa/server/satyrsforest/hypertext/public/basa/dictionary/dict.txt", { encoding: "utf8" })
.replace(/\r\n|\n\r|\r|\n/g, "\n");
export default output;