More mantra things
This commit is contained in:
133
scripts/convert-btc-tex-to-markdown.js
Normal file
133
scripts/convert-btc-tex-to-markdown.js
Normal file
@@ -0,0 +1,133 @@
|
||||
const fs = require('fs');
|
||||
const path = require("path")
|
||||
const { XMLParser } = require("fast-xml-parser")
|
||||
|
||||
const appendOptionalSpace = (nextObject) => {
|
||||
const punctuation = [".", ",", "?", ":", "!"]
|
||||
if (nextObject) {
|
||||
|
||||
const nextString = produceString(nextObject)
|
||||
|
||||
|
||||
if (punctuation.includes(nextString.trim().at(0))) {
|
||||
return ""
|
||||
}
|
||||
if (nextString.startsWith("—")) {
|
||||
return ""
|
||||
}
|
||||
return " "
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
const produceString = (xmlObject, references = [], nextObject = null, level = 1) => {
|
||||
if (Array.isArray(xmlObject)) {
|
||||
const array = xmlObject.map((object, index, array) => {
|
||||
return produceString(object, references, array[index+1])
|
||||
})
|
||||
return array.join("")
|
||||
} else {
|
||||
const keys = Object.keys(xmlObject)
|
||||
// console.log("Keys: ", keys)
|
||||
return keys.map((key, index, array) => {
|
||||
if (key == "component") {
|
||||
return produceString(xmlObject[key].slice(3), references)
|
||||
} else if (key == "header") {
|
||||
return produceString(xmlObject[key], references)
|
||||
} else if (key =="contentMeta") {
|
||||
return produceString(xmlObject[key], references)
|
||||
} else if (key =="titleGroup") {
|
||||
return produceString(xmlObject[key], references)
|
||||
} else if (key =="title") {
|
||||
return `# ${produceString(xmlObject[key], references)}`
|
||||
} else if (key =="body") {
|
||||
return produceString(xmlObject[key], references)
|
||||
} else if (key == "?xmltex") {
|
||||
return `${produceString(xmlObject[key], references)} `
|
||||
} else if (key == "#text") {
|
||||
return xmlObject[key]
|
||||
} else if (key == "section") {
|
||||
return `\n${produceString(xmlObject[key], references, level+1)}`
|
||||
} else if (key == "i") {
|
||||
return ` *${produceString(xmlObject[key], references)}*${appendOptionalSpace(nextObject)}`
|
||||
} else if (key == "p") {
|
||||
return `\n${produceString(xmlObject[key], references)}\n`
|
||||
} else if (key == ":@") {
|
||||
return `${produceString(xmlObject[key], references)}`
|
||||
} else if (key == "exlink") {
|
||||
const chapterNumber = xmlObject[":@"]["@_href"].split(":").at(-1).slice(-2)
|
||||
return ` [${Number(chapterNumber)}](exlink://${xmlObject[":@"]["@_href"]})${appendOptionalSpace(nextObject)}`
|
||||
} else if (key == "url") {
|
||||
return ` [${produceString(xmlObject["url"], references)}][${xmlObject[":@"]["@_href"]}] `
|
||||
} else if (key == "link") {
|
||||
return ` [${xmlObject[":@"]["@_href"]}](link://${xmlObject[":@"]["@_href"]})${appendOptionalSpace(nextObject)}`
|
||||
} else if (key == "note") {
|
||||
const footnote = `\n[^${xmlObject[":@"]['@_xml:id']}]: ${produceString(xmlObject[key], references)}\n`
|
||||
references.push(footnote)
|
||||
return `[^${xmlObject[":@"]['@_xml:id']}]${appendOptionalSpace(nextObject)}`
|
||||
} else if (key == "figure") {
|
||||
const image = xmlObject[key].find(o => o.mediaResource)
|
||||
const caption = xmlObject[key].find(o => o.caption)
|
||||
|
||||
if (caption) {
|
||||
return `\n${produceString(image, references)}\n<figure>\n${produceString(caption)}\n</figure>\n`
|
||||
} else {
|
||||
return `\n${produceString(image, references)}\n`
|
||||
}
|
||||
} else if (key == "mediaResource") {
|
||||
return ``
|
||||
} else if (key == "caption") {
|
||||
return `<figcaption>${produceString(xmlObject[key], references)}</figcaption>`
|
||||
}
|
||||
return ""
|
||||
}).join("")
|
||||
}
|
||||
}
|
||||
|
||||
const btcRootDir = "/home/sigidli/Documents/business/exonumia/bitcoin-standard/9781119473862"
|
||||
const excludeRootContent = [
|
||||
"pdf",
|
||||
"Manifest",
|
||||
"metadata",
|
||||
"cover_US",
|
||||
"control",
|
||||
"summary.xls",
|
||||
"protocol.html",
|
||||
// "fmatter",
|
||||
// "bmatter"
|
||||
]
|
||||
const rootDirectories = fs.readdirSync(btcRootDir).filter(d => !excludeRootContent.includes(d)) // .filter(d => d == "ch03")
|
||||
|
||||
rootDirectories.forEach(directory => {
|
||||
console.log(directory)
|
||||
const contentDirectories = fs.readdirSync(path.join(btcRootDir, directory))
|
||||
|
||||
// TODO: image export
|
||||
const textFiles = fs.readdirSync(path.join(btcRootDir, `${directory}/text_s`))
|
||||
|
||||
textFiles.forEach(file => {
|
||||
const fileContent = fs.readFileSync(path.join(btcRootDir, `${directory}/text_s/${file}`))
|
||||
const fileStringContent = fileContent.toString().split("\n").slice(6).join("\n")
|
||||
|
||||
const parser = new XMLParser({
|
||||
ignoreAttributes: false,
|
||||
preserveOrder: true,
|
||||
});
|
||||
|
||||
let xmlObject = parser.parse(fileStringContent)
|
||||
const references = []
|
||||
var markdownContent = produceString(xmlObject, references)
|
||||
markdownContent += references.join("")
|
||||
|
||||
const resultDIR = "result"
|
||||
if (!fs.existsSync(resultDIR)) {
|
||||
fs.mkdirSync(resultDIR)
|
||||
}
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(resultDIR, `${file}.md`),
|
||||
markdownContent
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user