Files
mantra.press/scripts/convert-btc-tex-to-markdown.js
2025-06-13 12:59:51 +02:00

134 lines
5.3 KiB
JavaScript

const fs = require('fs');
const path = require("path")
const { XMLParser } = require("fast-xml-parser")
const appendOptionalSpace = (nextObject) => {
const punctuation = [".", ",", "?", ":", "!"]
if (nextObject) {
const nextString = produceString(nextObject)
if (punctuation.includes(nextString.trim().at(0))) {
return ""
}
if (nextString.startsWith("—")) {
return ""
}
return " "
}
return ""
}
const produceString = (xmlObject, references = [], nextObject = null, level = 1) => {
if (Array.isArray(xmlObject)) {
const array = xmlObject.map((object, index, array) => {
return produceString(object, references, array[index+1])
})
return array.join("")
} else {
const keys = Object.keys(xmlObject)
// console.log("Keys: ", keys)
return keys.map((key, index, array) => {
if (key == "component") {
return produceString(xmlObject[key].slice(3), references)
} else if (key == "header") {
return produceString(xmlObject[key], references)
} else if (key =="contentMeta") {
return produceString(xmlObject[key], references)
} else if (key =="titleGroup") {
return produceString(xmlObject[key], references)
} else if (key =="title") {
return `# ${produceString(xmlObject[key], references)}`
} else if (key =="body") {
return produceString(xmlObject[key], references)
} else if (key == "?xmltex") {
return `${produceString(xmlObject[key], references)} `
} else if (key == "#text") {
return xmlObject[key]
} else if (key == "section") {
return `\n${produceString(xmlObject[key], references, level+1)}`
} else if (key == "i") {
return ` *${produceString(xmlObject[key], references)}*${appendOptionalSpace(nextObject)}`
} else if (key == "p") {
return `\n${produceString(xmlObject[key], references)}\n`
} else if (key == ":@") {
return `${produceString(xmlObject[key], references)}`
} else if (key == "exlink") {
const chapterNumber = xmlObject[":@"]["@_href"].split(":").at(-1).slice(-2)
return ` [${Number(chapterNumber)}](exlink://${xmlObject[":@"]["@_href"]})${appendOptionalSpace(nextObject)}`
} else if (key == "url") {
return ` [${produceString(xmlObject["url"], references)}][${xmlObject[":@"]["@_href"]}] `
} else if (key == "link") {
return ` [${xmlObject[":@"]["@_href"]}](link://${xmlObject[":@"]["@_href"]})${appendOptionalSpace(nextObject)}`
} else if (key == "note") {
const footnote = `\n[^${xmlObject[":@"]['@_xml:id']}]: ${produceString(xmlObject[key], references)}\n`
references.push(footnote)
return `[^${xmlObject[":@"]['@_xml:id']}]${appendOptionalSpace(nextObject)}`
} else if (key == "figure") {
const image = xmlObject[key].find(o => o.mediaResource)
const caption = xmlObject[key].find(o => o.caption)
if (caption) {
return `\n${produceString(image, references)}\n<figure>\n${produceString(caption)}\n</figure>\n`
} else {
return `\n${produceString(image, references)}\n`
}
} else if (key == "mediaResource") {
return `![](${xmlObject[":@"]["@_href"]})`
} else if (key == "caption") {
return `<figcaption>${produceString(xmlObject[key], references)}</figcaption>`
}
return ""
}).join("")
}
}
const btcRootDir = "/home/sigidli/Documents/business/exonumia/bitcoin-standard/9781119473862"
const excludeRootContent = [
"pdf",
"Manifest",
"metadata",
"cover_US",
"control",
"summary.xls",
"protocol.html",
// "fmatter",
// "bmatter"
]
const rootDirectories = fs.readdirSync(btcRootDir).filter(d => !excludeRootContent.includes(d)) // .filter(d => d == "ch03")
rootDirectories.forEach(directory => {
console.log(directory)
const contentDirectories = fs.readdirSync(path.join(btcRootDir, directory))
// TODO: image export
const textFiles = fs.readdirSync(path.join(btcRootDir, `${directory}/text_s`))
textFiles.forEach(file => {
const fileContent = fs.readFileSync(path.join(btcRootDir, `${directory}/text_s/${file}`))
const fileStringContent = fileContent.toString().split("\n").slice(6).join("\n")
const parser = new XMLParser({
ignoreAttributes: false,
preserveOrder: true,
});
let xmlObject = parser.parse(fileStringContent)
const references = []
var markdownContent = produceString(xmlObject, references)
markdownContent += references.join("")
const resultDIR = "result"
if (!fs.existsSync(resultDIR)) {
fs.mkdirSync(resultDIR)
}
fs.writeFileSync(
path.join(resultDIR, `${file}.md`),
markdownContent
)
})
});