const fs = require('fs'); const path = require("path") const { XMLParser } = require("fast-xml-parser") const appendOptionalSpace = (nextObject) => { const punctuation = [".", ",", "?", ":", "!"] if (nextObject) { const nextString = produceString(nextObject) if (punctuation.includes(nextString.trim().at(0))) { return "" } if (nextString.startsWith("—")) { return "" } return " " } return "" } const produceString = (xmlObject, references = [], nextObject = null, level = 1) => { if (Array.isArray(xmlObject)) { const array = xmlObject.map((object, index, array) => { return produceString(object, references, array[index+1]) }) return array.join("") } else { const keys = Object.keys(xmlObject) // console.log("Keys: ", keys) return keys.map((key, index, array) => { if (key == "component") { return produceString(xmlObject[key].slice(3), references) } else if (key == "header") { return produceString(xmlObject[key], references) } else if (key =="contentMeta") { return produceString(xmlObject[key], references) } else if (key =="titleGroup") { return produceString(xmlObject[key], references) } else if (key =="title") { return `# ${produceString(xmlObject[key], references)}` } else if (key =="body") { return produceString(xmlObject[key], references) } else if (key == "?xmltex") { return `${produceString(xmlObject[key], references)} ` } else if (key == "#text") { return xmlObject[key] } else if (key == "section") { return `\n${produceString(xmlObject[key], references, level+1)}` } else if (key == "i") { return ` *${produceString(xmlObject[key], references)}*${appendOptionalSpace(nextObject)}` } else if (key == "p") { return `\n${produceString(xmlObject[key], references)}\n` } else if (key == ":@") { return `${produceString(xmlObject[key], references)}` } else if (key == "exlink") { const chapterNumber = xmlObject[":@"]["@_href"].split(":").at(-1).slice(-2) return ` [${Number(chapterNumber)}](exlink://${xmlObject[":@"]["@_href"]})${appendOptionalSpace(nextObject)}` } else if (key == "url") { return ` [${produceString(xmlObject["url"], references)}][${xmlObject[":@"]["@_href"]}] ` } else if (key == "link") { return ` [${xmlObject[":@"]["@_href"]}](link://${xmlObject[":@"]["@_href"]})${appendOptionalSpace(nextObject)}` } else if (key == "note") { const footnote = `\n[^${xmlObject[":@"]['@_xml:id']}]: ${produceString(xmlObject[key], references)}\n` references.push(footnote) return `[^${xmlObject[":@"]['@_xml:id']}]${appendOptionalSpace(nextObject)}` } else if (key == "figure") { const image = xmlObject[key].find(o => o.mediaResource) const caption = xmlObject[key].find(o => o.caption) if (caption) { return `\n${produceString(image, references)}\n
\n${produceString(caption)}\n
\n` } else { return `\n${produceString(image, references)}\n` } } else if (key == "mediaResource") { return `![](${xmlObject[":@"]["@_href"]})` } else if (key == "caption") { return `
${produceString(xmlObject[key], references)}
` } return "" }).join("") } } const btcRootDir = "/home/sigidli/Documents/business/exonumia/bitcoin-standard/9781119473862" const excludeRootContent = [ "pdf", "Manifest", "metadata", "cover_US", "control", "summary.xls", "protocol.html", // "fmatter", // "bmatter" ] const rootDirectories = fs.readdirSync(btcRootDir).filter(d => !excludeRootContent.includes(d)) // .filter(d => d == "ch03") rootDirectories.forEach(directory => { console.log(directory) const contentDirectories = fs.readdirSync(path.join(btcRootDir, directory)) // TODO: image export const textFiles = fs.readdirSync(path.join(btcRootDir, `${directory}/text_s`)) textFiles.forEach(file => { const fileContent = fs.readFileSync(path.join(btcRootDir, `${directory}/text_s/${file}`)) const fileStringContent = fileContent.toString().split("\n").slice(6).join("\n") const parser = new XMLParser({ ignoreAttributes: false, preserveOrder: true, }); let xmlObject = parser.parse(fileStringContent) const references = [] var markdownContent = produceString(xmlObject, references) markdownContent += references.join("") const resultDIR = "result" if (!fs.existsSync(resultDIR)) { fs.mkdirSync(resultDIR) } fs.writeFileSync( path.join(resultDIR, `${file}.md`), markdownContent ) }) });