From 94d1aeb287d91f695232f1ac6543ca4a58ed92b6 Mon Sep 17 00:00:00 2001 From: Mononaut Date: Tue, 2 Aug 2022 21:02:33 +0000 Subject: [PATCH] Fix unfurler language support --- .../src/app/services/opengraph.service.ts | 12 +-- unfurler/src/concurrency/ReusablePage.ts | 8 +- unfurler/src/index.ts | 54 +++++++------ unfurler/src/language/lang.ts | 79 +++++++++++++++++++ 4 files changed, 121 insertions(+), 32 deletions(-) create mode 100644 unfurler/src/language/lang.ts diff --git a/frontend/src/app/services/opengraph.service.ts b/frontend/src/app/services/opengraph.service.ts index 12c74efcb..58de73325 100644 --- a/frontend/src/app/services/opengraph.service.ts +++ b/frontend/src/app/services/opengraph.service.ts @@ -76,7 +76,7 @@ export class OpenGraphService { this.metaService.updateTag({ property: 'og:preview:loading', content: 'loading'}); } - // signal that an event has resolved + // mark an event as resolved // if all registered events have resolved, signal we are ready for a screenshot waitOver(event) { if (this.previewLoadingEvents[event]) { @@ -100,9 +100,11 @@ export class OpenGraphService { } loadPage(path) { - this.resetLoading(); - this.ngZone.run(() => { - this.router.navigateByUrl(path); - }) + if (path !== this.router.url) { + this.resetLoading(); + this.ngZone.run(() => { + this.router.navigateByUrl(path); + }) + } } } diff --git a/unfurler/src/concurrency/ReusablePage.ts b/unfurler/src/concurrency/ReusablePage.ts index 98cdadc4d..c8f40b2c8 100644 --- a/unfurler/src/concurrency/ReusablePage.ts +++ b/unfurler/src/concurrency/ReusablePage.ts @@ -5,12 +5,13 @@ import { timeoutExecute } from 'puppeteer-cluster/dist/util'; import config from '../config'; const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : ''); -const BROWSER_TIMEOUT = 5000; +const BROWSER_TIMEOUT = 8000; // maximum lifetime of a single page session const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000; interface repairablePage extends puppeteer.Page { repairRequested?: boolean; + language?: string | null; } export default class ReusablePage extends ConcurrencyImplementation { @@ -68,6 +69,7 @@ export default class ReusablePage extends ConcurrencyImplementation { protected async createResources(): Promise { if (!this.currentPage) { this.currentPage = await (this.browser as puppeteer.Browser).newPage(); + this.currentPage.language = null; this.pageCreatedAt = Date.now(); const defaultUrl = mempoolHost + '/preview/block/1'; this.currentPage.on('pageerror', (err) => { @@ -85,10 +87,6 @@ export default class ReusablePage extends ConcurrencyImplementation { return { jobInstance: async () => { - if (this.repairRequested || this.currentPage?.repairRequested) { - await this.repair(); - } - await timeoutExecute(BROWSER_TIMEOUT, (async () => { resources = await this.createResources(); })()); diff --git a/unfurler/src/index.ts b/unfurler/src/index.ts index 089c9a280..54db5fa97 100644 --- a/unfurler/src/index.ts +++ b/unfurler/src/index.ts @@ -4,6 +4,7 @@ import * as http from 'http'; import config from './config'; import { Cluster } from 'puppeteer-cluster'; import ReusablePage from './concurrency/ReusablePage'; +import { parseLanguageUrl } from './language/lang'; const puppeteerConfig = require('../puppeteer.config.json'); if (config.PUPPETEER.EXEC_PATH) { @@ -46,6 +47,8 @@ class Server { this.server.listen(config.SERVER.HTTP_PORT, () => { console.log(`Mempool Unfurl Server is running on port ${config.SERVER.HTTP_PORT}`); }); + + this.initClusterPages(); } async stopServer() { @@ -63,9 +66,24 @@ class Server { this.app.get('*', (req, res) => { return this.renderHTML(req, res) }) } + async initClusterPages() { + for (let i = 0; i < config.PUPPETEER.CLUSTER_SIZE; i++) { + this.cluster?.execute({ action: 'init' }); + } + } + async clusterTask({ page, data: { url, path, action } }) { + if (action === 'init') { + return; + } try { - if (action === 'screenshot' || action === 'html') { + const urlParts = parseLanguageUrl(path); + if (page.language !== urlParts.lang) { + // switch language + page.language = urlParts.lang; + const localizedUrl = urlParts.lang ? `${this.mempoolHost}/${urlParts.lang}${urlParts.path}` : `${this.mempoolHost}${urlParts.path}` ; + await page.goto(localizedUrl, { waitUntil: "load" }); + } else { const loaded = await page.evaluate(async (path) => { if (window['ogService']) { window['ogService'].loadPage(path); @@ -73,26 +91,21 @@ class Server { } else { return false; } - }, path) - + }, urlParts.path); if (!loaded) { throw new Error('failed to access open graph service'); } + } - if (action === 'screenshot') { - const waitForReady = await page.$('meta[property="og:preview:loading"]'); - const alreadyReady = await page.$('meta[property="og:preview:ready"]'); - if (waitForReady != null && alreadyReady == null) { - await page.waitForSelector('meta[property="og:preview:ready"]', { timeout: 8000 }); - } - return page.screenshot(); - } else if (action === 'html') { - const alreadyReady = await page.$('meta[property="og:meta:ready"]'); - if (alreadyReady == null) { - await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: 8000 }); - } - return page.content(); + if (action === 'screenshot') { + const waitForReady = await page.$('meta[property="og:preview:loading"]'); + if (waitForReady != null) { + await page.waitForSelector('meta[property="og:preview:ready"]', { timeout: 3000 }); } + return page.screenshot(); + } else if (action === 'html') { + await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: 3000 }); + return page.content(); } } catch (e) { console.log(`failed to render page for ${action}`, e instanceof Error ? e.message : e); @@ -102,13 +115,13 @@ class Server { async renderPreview(req, res) { try { - // strip default language code for compatibility - const path = req.params[0].replace('/en/', '/'); + const path = req.params[0] const img = await this.cluster?.execute({ url: this.mempoolHost + path, path: path, action: 'screenshot' }); if (!img) { throw new Error('failed to render preview image'); } + res.contentType('image/png'); res.send(img); } catch (e) { @@ -127,10 +140,7 @@ class Server { } try { - // strip default language code for compatibility - const path = req.params[0].replace('/en/', '/'); - - let html = await this.cluster?.execute({ url: this.mempoolHost + req.params[0], path: req.params[0], action: 'html' }); + let html = await this.cluster?.execute({ url: this.mempoolHost + path, path: path, action: 'html' }); if (!html) { throw new Error('failed to render preview image'); } diff --git a/unfurler/src/language/lang.ts b/unfurler/src/language/lang.ts new file mode 100644 index 000000000..610e68312 --- /dev/null +++ b/unfurler/src/language/lang.ts @@ -0,0 +1,79 @@ +export interface Language { + code: string; + name: string; +} + +const languageList: Language[] = [ + { code: 'ar', name: 'العربية' }, // Arabic + { code: 'bg', name: 'Български' }, // Bulgarian + { code: 'bs', name: 'Bosanski' }, // Bosnian + { code: 'ca', name: 'Català' }, // Catalan + { code: 'cs', name: 'Čeština' }, // Czech + { code: 'da', name: 'Dansk' }, // Danish + { code: 'de', name: 'Deutsch' }, // German + { code: 'et', name: 'Eesti' }, // Estonian + { code: 'el', name: 'Ελληνικά' }, // Greek + { code: 'en', name: 'English' }, // English + { code: 'es', name: 'Español' }, // Spanish + { code: 'eo', name: 'Esperanto' }, // Esperanto + { code: 'eu', name: 'Euskara' }, // Basque + { code: 'fa', name: 'فارسی' }, // Persian + { code: 'fr', name: 'Français' }, // French + { code: 'gl', name: 'Galego' }, // Galician + { code: 'ko', name: '한국어' }, // Korean + { code: 'hr', name: 'Hrvatski' }, // Croatian + { code: 'id', name: 'Bahasa Indonesia' },// Indonesian + { code: 'hi', name: 'हिन्दी' }, // Hindi + { code: 'it', name: 'Italiano' }, // Italian + { code: 'he', name: 'עברית' }, // Hebrew + { code: 'ka', name: 'ქართული' }, // Georgian + { code: 'lv', name: 'Latviešu' }, // Latvian + { code: 'lt', name: 'Lietuvių' }, // Lithuanian + { code: 'hu', name: 'Magyar' }, // Hungarian + { code: 'mk', name: 'Македонски' }, // Macedonian + { code: 'ms', name: 'Bahasa Melayu' }, // Malay + { code: 'nl', name: 'Nederlands' }, // Dutch + { code: 'ja', name: '日本語' }, // Japanese + { code: 'nb', name: 'Norsk' }, // Norwegian Bokmål + { code: 'nn', name: 'Norsk Nynorsk' }, // Norwegian Nynorsk + { code: 'pl', name: 'Polski' }, // Polish + { code: 'pt', name: 'Português' }, // Portuguese + { code: 'pt-BR', name: 'Português (Brazil)' }, // Portuguese (Brazil) + { code: 'ro', name: 'Română' }, // Romanian + { code: 'ru', name: 'Русский' }, // Russian + { code: 'sk', name: 'Slovenčina' }, // Slovak + { code: 'sl', name: 'Slovenščina' }, // Slovenian + { code: 'sr', name: 'Српски / srpski' }, // Serbian + { code: 'sh', name: 'Srpskohrvatski / српскохрватски' },// Serbo-Croatian + { code: 'fi', name: 'Suomi' }, // Finnish + { code: 'sv', name: 'Svenska' }, // Swedish + { code: 'th', name: 'ไทย' }, // Thai + { code: 'tr', name: 'Türkçe' }, // Turkish + { code: 'uk', name: 'Українська' }, // Ukrainian + { code: 'vi', name: 'Tiếng Việt' }, // Vietnamese + { code: 'zh', name: '中文' }, // Chinese +]; + +const languageDict = {}; +languageList.forEach(lang => { + languageDict[lang.code] = lang +}); +export const languages = languageDict; + +// expects path to start with a leading '/' +export function parseLanguageUrl(path) { + const parts = path.split('/'); + let lang; + let rest; + if (languages[parts[1]]) { + lang = parts[1]; + rest = '/' + parts.slice(2).join('/'); + } else { + lang = null; + rest = path; + } + if (lang === 'en') { + lang = null; + } + return { lang, path: rest }; +}