From df8b6cd53cb478b2049b820eae5fcf2cc13ed005 Mon Sep 17 00:00:00 2001 From: Mononaut Date: Sat, 19 Aug 2023 18:40:11 +0900 Subject: [PATCH 1/6] Fix unfurler resource proxying --- unfurler/src/index.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/unfurler/src/index.ts b/unfurler/src/index.ts index 69882579f..967d60117 100644 --- a/unfurler/src/index.ts +++ b/unfurler/src/index.ts @@ -259,9 +259,14 @@ class Server { return; } else { if (this.secureHost) { - https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res)); - } else { - http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res)); + https.get(config.SERVER.HOST + ':4200' + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { + res.writeHead(got.statusCode, got.headers); + return got.pipe(res); + }); } else { + http.get(config.SERVER.HOST + ':4200' + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { + res.writeHead(got.statusCode, got.headers); + return got.pipe(res); + }); } return; } From 3bda5537d7c65311df671c7d2ac7dd72e19720c1 Mon Sep 17 00:00:00 2001 From: Mononaut Date: Sat, 19 Aug 2023 18:40:27 +0900 Subject: [PATCH 2/6] More verbose unfurler logs --- unfurler/src/index.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/unfurler/src/index.ts b/unfurler/src/index.ts index 967d60117..2c54a5575 100644 --- a/unfurler/src/index.ts +++ b/unfurler/src/index.ts @@ -258,12 +258,14 @@ class Server { res.status(404).send(); return; } else { + logger.info('proxying resource "' + req.url + '"'); if (this.secureHost) { - https.get(config.SERVER.HOST + ':4200' + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { + https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { res.writeHead(got.statusCode, got.headers); return got.pipe(res); - }); } else { - http.get(config.SERVER.HOST + ':4200' + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { + }); + } else { + http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { res.writeHead(got.statusCode, got.headers); return got.pipe(res); }); @@ -272,6 +274,8 @@ class Server { } } + logger.info((unfurl ? 'unfurling ' : 'slurping "') + req.url + '"'); + let result = ''; try { if (unfurl) { From 126e87a74611b0b97ed294dea989fcabaac58ab2 Mon Sep 17 00:00:00 2001 From: Mononaut Date: Sat, 19 Aug 2023 19:35:52 +0900 Subject: [PATCH 3/6] More more verbose unfurler logs --- unfurler/src/index.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unfurler/src/index.ts b/unfurler/src/index.ts index 2c54a5575..6516cb18a 100644 --- a/unfurler/src/index.ts +++ b/unfurler/src/index.ts @@ -228,7 +228,10 @@ class Server { // don't bother unless the route is definitely renderable if (rawPath.includes('/preview/') && matchedRoute.render) { + logger.info('rendering "' + req.url + '"'); img = await this.cluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'screenshot' }); + } else { + logger.info('rendering not enabled for page "' + req.url + '"'); } if (!img) { From 7f2a459575b1ac1ec5089669a3b117bf46ffb683 Mon Sep 17 00:00:00 2001 From: Mononaut Date: Sat, 19 Aug 2023 21:05:28 +0900 Subject: [PATCH 4/6] Fix SSR puppeteer page initialization --- unfurler/src/concurrency/ReusablePage.ts | 2 +- unfurler/src/concurrency/ReusableSSRPage.ts | 44 +++++++++------------ unfurler/src/index.ts | 2 +- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/unfurler/src/concurrency/ReusablePage.ts b/unfurler/src/concurrency/ReusablePage.ts index 4b272afb0..55ae22d03 100644 --- a/unfurler/src/concurrency/ReusablePage.ts +++ b/unfurler/src/concurrency/ReusablePage.ts @@ -11,7 +11,7 @@ const BROWSER_TIMEOUT = 8000; const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000; const maxConcurrency = config.PUPPETEER.CLUSTER_SIZE; -interface RepairablePage extends puppeteer.Page { +export interface RepairablePage extends puppeteer.Page { repairRequested?: boolean; language?: string | null; createdAt?: number; diff --git a/unfurler/src/concurrency/ReusableSSRPage.ts b/unfurler/src/concurrency/ReusableSSRPage.ts index c68514a16..03afb6c03 100644 --- a/unfurler/src/concurrency/ReusableSSRPage.ts +++ b/unfurler/src/concurrency/ReusableSSRPage.ts @@ -2,19 +2,11 @@ import * as puppeteer from 'puppeteer'; import { timeoutExecute } from 'puppeteer-cluster/dist/util'; import logger from '../logger'; import config from '../config'; -import ReusablePage from './ReusablePage'; +import ReusablePage, { RepairablePage } from './ReusablePage'; const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : ''); const mockImageBuffer = Buffer.from("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=", 'base64'); -interface RepairablePage extends puppeteer.Page { - repairRequested?: boolean; - language?: string | null; - createdAt?: number; - free?: boolean; - index?: number; -} - export default class ReusableSSRPage extends ReusablePage { public constructor(options: puppeteer.LaunchOptions, puppeteer: any) { @@ -33,25 +25,25 @@ export default class ReusableSSRPage extends ReusablePage { page.on('pageerror', (err) => { console.log(err); - // page.repairRequested = true; + page.repairRequested = true; }); await page.setRequestInterception(true); - page.on('request', req => { - if (req.isInterceptResolutionHandled()) { - return req.continue(); - } - if (req.resourceType() === 'image') { - return req.respond({ - contentType: 'image/png', - headers: {"Access-Control-Allow-Origin": "*"}, - body: mockImageBuffer - }); - } else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) { - return req.abort(); - } else { - return req.continue(); - } - }); + page.on('request', req => { + if (req.isInterceptResolutionHandled()) { + return req.continue(); + } + if (req.resourceType() === 'image') { + return req.respond({ + contentType: 'image/png', + headers: {"Access-Control-Allow-Origin": "*"}, + body: mockImageBuffer + }); + } else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) { + return req.abort(); + } else { + return req.continue(); + } + }); try { await page.goto(defaultUrl, { waitUntil: "networkidle0" }); await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }); diff --git a/unfurler/src/index.ts b/unfurler/src/index.ts index 6516cb18a..71adba257 100644 --- a/unfurler/src/index.ts +++ b/unfurler/src/index.ts @@ -5,7 +5,7 @@ import * as https from 'https'; import config from './config'; import { Cluster } from 'puppeteer-cluster'; import ReusablePage from './concurrency/ReusablePage'; -import ReusableSSRPage from './concurrency/ReusablePage'; +import ReusableSSRPage from './concurrency/ReusableSSRPage'; import { parseLanguageUrl } from './language/lang'; import { matchRoute } from './routes'; import nodejsPath from 'path'; From ed9d31686ed1ce6c6209bab7d32ec8758c471955 Mon Sep 17 00:00:00 2001 From: Mononaut Date: Sat, 19 Aug 2023 21:07:10 +0900 Subject: [PATCH 5/6] Add cluster/tab to unfurler logs --- unfurler/src/concurrency/ReusablePage.ts | 7 ++++-- unfurler/src/concurrency/ReusableSSRPage.ts | 1 + unfurler/src/index.ts | 26 ++++++++++----------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/unfurler/src/concurrency/ReusablePage.ts b/unfurler/src/concurrency/ReusablePage.ts index 55ae22d03..f6724b18c 100644 --- a/unfurler/src/concurrency/ReusablePage.ts +++ b/unfurler/src/concurrency/ReusablePage.ts @@ -17,6 +17,7 @@ export interface RepairablePage extends puppeteer.Page { createdAt?: number; free?: boolean; index?: number; + clusterGroup?: string; } interface ResourceData { @@ -76,7 +77,7 @@ export default class ReusablePage extends ConcurrencyImplementation { for (let i = 0; i < maxConcurrency; i++) { const newPage = await this.initPage(); newPage.index = this.pages.length; - logger.info(`initialized page ${newPage.index}`); + logger.info(`initialized page ${newPage.clusterGroup}:${newPage.index}`); this.pages.push(newPage); } } @@ -87,6 +88,7 @@ export default class ReusablePage extends ConcurrencyImplementation { protected async initPage(): Promise { const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage; + page.clusterGroup = 'unfurler'; page.language = null; page.createdAt = Date.now(); let defaultUrl @@ -129,6 +131,7 @@ export default class ReusablePage extends ConcurrencyImplementation { protected async repairPage(page) { // create a new page + logger.debug(`Repairing page ${page.clusterGroup}:${page.index}`); const newPage = await this.initPage(); newPage.free = true; // replace the old page @@ -138,7 +141,7 @@ export default class ReusablePage extends ConcurrencyImplementation { try { await page.goto('about:blank', {timeout: 200}); // prevents memory leak (maybe?) } catch (e) { - logger.err('unexpected page repair error'); + logger.err(`unexpected page repair error ${page.clusterGroup}:${page.index}`); } await page.close(); return newPage; diff --git a/unfurler/src/concurrency/ReusableSSRPage.ts b/unfurler/src/concurrency/ReusableSSRPage.ts index 03afb6c03..24e9a0f2a 100644 --- a/unfurler/src/concurrency/ReusableSSRPage.ts +++ b/unfurler/src/concurrency/ReusableSSRPage.ts @@ -19,6 +19,7 @@ export default class ReusableSSRPage extends ReusablePage { protected async initPage(): Promise { const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage; + page.clusterGroup = 'slurper'; page.language = null; page.createdAt = Date.now(); const defaultUrl = mempoolHost + '/about'; diff --git a/unfurler/src/index.ts b/unfurler/src/index.ts index 71adba257..a81dcbf7a 100644 --- a/unfurler/src/index.ts +++ b/unfurler/src/index.ts @@ -120,8 +120,9 @@ class Server { this.app.get('*', (req, res) => { return this.renderHTML(req, res, false) }) } - async clusterTask({ page, data: { url, path, action } }) { + async clusterTask({ page, data: { url, path, action, reqUrl } }) { try { + logger.info(`rendering "${reqUrl}" on tab ${page.clusterGroup}:${page.index}`); const urlParts = parseLanguageUrl(path); if (page.language !== urlParts.lang) { // switch language @@ -156,20 +157,21 @@ class Server { }); return screenshot; } else if (success === false) { - logger.warn(`failed to render ${path} for ${action} due to client-side error, e.g. requested an invalid txid`); + logger.warn(`failed to render ${reqUrl} for ${action} due to client-side error, e.g. requested an invalid txid`); page.repairRequested = true; } else { - logger.warn(`failed to render ${path} for ${action} due to puppeteer timeout`); + logger.warn(`failed to render ${reqUrl} for ${action} due to puppeteer timeout`); page.repairRequested = true; } } catch (e) { - logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); + logger.err(`failed to render ${reqUrl} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); page.repairRequested = true; } } - async ssrClusterTask({ page, data: { url, path, action } }) { + async ssrClusterTask({ page, data: { url, path, action, reqUrl } }) { try { + logger.info(`slurping "${reqUrl}" on tab ${page.clusterGroup}:${page.index}`); const urlParts = parseLanguageUrl(path); if (page.language !== urlParts.lang) { // switch language @@ -207,7 +209,7 @@ class Server { let html = await page.content(); return html; } else { - logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); + logger.err(`failed to render ${reqUrl} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); page.repairRequested = true; } } @@ -228,8 +230,7 @@ class Server { // don't bother unless the route is definitely renderable if (rawPath.includes('/preview/') && matchedRoute.render) { - logger.info('rendering "' + req.url + '"'); - img = await this.cluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'screenshot' }); + img = await this.cluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'screenshot', reqUrl: req.url }); } else { logger.info('rendering not enabled for page "' + req.url + '"'); } @@ -277,14 +278,13 @@ class Server { } } - logger.info((unfurl ? 'unfurling ' : 'slurping "') + req.url + '"'); - let result = ''; try { if (unfurl) { + logger.info('unfurling "' + req.url + '"'); result = await this.renderUnfurlMeta(rawPath); } else { - result = await this.renderSEOPage(rawPath); + result = await this.renderSEOPage(rawPath, req.url); } if (result && result.length) { if (result === '404') { @@ -338,8 +338,8 @@ class Server { `; } - async renderSEOPage(rawPath: string): Promise { - let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr' }); + async renderSEOPage(rawPath: string, reqUrl: string): Promise { + let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr', reqUrl }); // remove javascript to prevent double hydration if (html && html.length) { html = html.replaceAll(//g, ""); From 65847547b9f5cb56ca8ff8d8d9e85bab7659a2f1 Mon Sep 17 00:00:00 2001 From: wiz Date: Sat, 19 Aug 2023 23:23:28 +0900 Subject: [PATCH 6/6] ops: Tweak nginx cache for slurper --- production/install | 3 +-- production/nginx/http-proxy-cache.conf | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/production/install b/production/install index 47a779e57..67e2f7d78 100755 --- a/production/install +++ b/production/install @@ -530,6 +530,7 @@ osCertbotDryRun() zfsCreateFilesystems() { zfs create -o "mountpoint=/backup" "${ZPOOL}/backup" + zfs create -o "mountpoint=/var/cache/nginx" "${ZPOOL}/cache" zfs create -o "mountpoint=${ELEMENTS_HOME}" "${ZPOOL}/elements" zfs create -o "mountpoint=${BITCOIN_HOME}" "${ZPOOL}/bitcoin" @@ -1852,8 +1853,6 @@ chown "${MEMPOOL_USER}:${MEMPOOL_GROUP}" "${MEMPOOL_MYSQL_CREDENTIALS}" echo "[*] Adding Nginx configuration" osSudo "${ROOT_USER}" install -c -o "${ROOT_USER}" -g "${ROOT_GROUP}" -m 644 "${MEMPOOL_HOME}/${MEMPOOL_REPO_NAME}/production/nginx/nginx.conf" "${NGINX_CONFIGURATION}" -mkdir -p /var/cache/nginx/services /var/cache/nginx/api -chown "${NGINX_USER}:${NGINX_GROUP}" /var/cache/nginx/services /var/cache/nginx/api ln -s "${MEMPOOL_HOME}/mempool" "${NGINX_ETC_FOLDER}/mempool" osSudo "${ROOT_USER}" sed -i.orig "s!__NGINX_USER__!${NGINX_USER}!" "${NGINX_CONFIGURATION}" osSudo "${ROOT_USER}" sed -i.orig "s!__NGINX_ETC_FOLDER__!${NGINX_ETC_FOLDER}!" "${NGINX_CONFIGURATION}" diff --git a/production/nginx/http-proxy-cache.conf b/production/nginx/http-proxy-cache.conf index 4d0e6614e..92d769b81 100644 --- a/production/nginx/http-proxy-cache.conf +++ b/production/nginx/http-proxy-cache.conf @@ -1,7 +1,7 @@ # proxy cache -proxy_cache_path /var/cache/nginx/api keys_zone=api:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/services keys_zone=services:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/markets keys_zone=markets:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/unfurler keys_zone=unfurler:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/slurper keys_zone=slurper:20m levels=1:2 inactive=600s max_size=200m; -types_hash_max_size 2048; +proxy_cache_path /var/cache/nginx/api keys_zone=api:20m levels=1:2 inactive=365d max_size=2000m; +proxy_cache_path /var/cache/nginx/unfurler keys_zone=unfurler:20m levels=1:2 inactive=365d max_size=2000m; +proxy_cache_path /var/cache/nginx/slurper keys_zone=slurper:20m levels=1:2 inactive=365d max_size=5000m; +proxy_cache_path /var/cache/nginx/services keys_zone=services:20m levels=1:2 inactive=365d max_size=100m; +proxy_cache_path /var/cache/nginx/markets keys_zone=markets:20m levels=1:2 inactive=365d max_size=100m; +types_hash_max_size 4096;