diff --git a/production/install b/production/install index 47a779e57..67e2f7d78 100755 --- a/production/install +++ b/production/install @@ -530,6 +530,7 @@ osCertbotDryRun() zfsCreateFilesystems() { zfs create -o "mountpoint=/backup" "${ZPOOL}/backup" + zfs create -o "mountpoint=/var/cache/nginx" "${ZPOOL}/cache" zfs create -o "mountpoint=${ELEMENTS_HOME}" "${ZPOOL}/elements" zfs create -o "mountpoint=${BITCOIN_HOME}" "${ZPOOL}/bitcoin" @@ -1852,8 +1853,6 @@ chown "${MEMPOOL_USER}:${MEMPOOL_GROUP}" "${MEMPOOL_MYSQL_CREDENTIALS}" echo "[*] Adding Nginx configuration" osSudo "${ROOT_USER}" install -c -o "${ROOT_USER}" -g "${ROOT_GROUP}" -m 644 "${MEMPOOL_HOME}/${MEMPOOL_REPO_NAME}/production/nginx/nginx.conf" "${NGINX_CONFIGURATION}" -mkdir -p /var/cache/nginx/services /var/cache/nginx/api -chown "${NGINX_USER}:${NGINX_GROUP}" /var/cache/nginx/services /var/cache/nginx/api ln -s "${MEMPOOL_HOME}/mempool" "${NGINX_ETC_FOLDER}/mempool" osSudo "${ROOT_USER}" sed -i.orig "s!__NGINX_USER__!${NGINX_USER}!" "${NGINX_CONFIGURATION}" osSudo "${ROOT_USER}" sed -i.orig "s!__NGINX_ETC_FOLDER__!${NGINX_ETC_FOLDER}!" "${NGINX_CONFIGURATION}" diff --git a/production/nginx/http-proxy-cache.conf b/production/nginx/http-proxy-cache.conf index 4d0e6614e..92d769b81 100644 --- a/production/nginx/http-proxy-cache.conf +++ b/production/nginx/http-proxy-cache.conf @@ -1,7 +1,7 @@ # proxy cache -proxy_cache_path /var/cache/nginx/api keys_zone=api:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/services keys_zone=services:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/markets keys_zone=markets:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/unfurler keys_zone=unfurler:20m levels=1:2 inactive=600s max_size=200m; -proxy_cache_path /var/cache/nginx/slurper keys_zone=slurper:20m levels=1:2 inactive=600s max_size=200m; -types_hash_max_size 2048; +proxy_cache_path /var/cache/nginx/api keys_zone=api:20m levels=1:2 inactive=365d max_size=2000m; +proxy_cache_path /var/cache/nginx/unfurler keys_zone=unfurler:20m levels=1:2 inactive=365d max_size=2000m; +proxy_cache_path /var/cache/nginx/slurper keys_zone=slurper:20m levels=1:2 inactive=365d max_size=5000m; +proxy_cache_path /var/cache/nginx/services keys_zone=services:20m levels=1:2 inactive=365d max_size=100m; +proxy_cache_path /var/cache/nginx/markets keys_zone=markets:20m levels=1:2 inactive=365d max_size=100m; +types_hash_max_size 4096; diff --git a/unfurler/src/concurrency/ReusablePage.ts b/unfurler/src/concurrency/ReusablePage.ts index 4b272afb0..f6724b18c 100644 --- a/unfurler/src/concurrency/ReusablePage.ts +++ b/unfurler/src/concurrency/ReusablePage.ts @@ -11,12 +11,13 @@ const BROWSER_TIMEOUT = 8000; const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000; const maxConcurrency = config.PUPPETEER.CLUSTER_SIZE; -interface RepairablePage extends puppeteer.Page { +export interface RepairablePage extends puppeteer.Page { repairRequested?: boolean; language?: string | null; createdAt?: number; free?: boolean; index?: number; + clusterGroup?: string; } interface ResourceData { @@ -76,7 +77,7 @@ export default class ReusablePage extends ConcurrencyImplementation { for (let i = 0; i < maxConcurrency; i++) { const newPage = await this.initPage(); newPage.index = this.pages.length; - logger.info(`initialized page ${newPage.index}`); + logger.info(`initialized page ${newPage.clusterGroup}:${newPage.index}`); this.pages.push(newPage); } } @@ -87,6 +88,7 @@ export default class ReusablePage extends ConcurrencyImplementation { protected async initPage(): Promise { const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage; + page.clusterGroup = 'unfurler'; page.language = null; page.createdAt = Date.now(); let defaultUrl @@ -129,6 +131,7 @@ export default class ReusablePage extends ConcurrencyImplementation { protected async repairPage(page) { // create a new page + logger.debug(`Repairing page ${page.clusterGroup}:${page.index}`); const newPage = await this.initPage(); newPage.free = true; // replace the old page @@ -138,7 +141,7 @@ export default class ReusablePage extends ConcurrencyImplementation { try { await page.goto('about:blank', {timeout: 200}); // prevents memory leak (maybe?) } catch (e) { - logger.err('unexpected page repair error'); + logger.err(`unexpected page repair error ${page.clusterGroup}:${page.index}`); } await page.close(); return newPage; diff --git a/unfurler/src/concurrency/ReusableSSRPage.ts b/unfurler/src/concurrency/ReusableSSRPage.ts index c68514a16..24e9a0f2a 100644 --- a/unfurler/src/concurrency/ReusableSSRPage.ts +++ b/unfurler/src/concurrency/ReusableSSRPage.ts @@ -2,19 +2,11 @@ import * as puppeteer from 'puppeteer'; import { timeoutExecute } from 'puppeteer-cluster/dist/util'; import logger from '../logger'; import config from '../config'; -import ReusablePage from './ReusablePage'; +import ReusablePage, { RepairablePage } from './ReusablePage'; const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : ''); const mockImageBuffer = Buffer.from("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=", 'base64'); -interface RepairablePage extends puppeteer.Page { - repairRequested?: boolean; - language?: string | null; - createdAt?: number; - free?: boolean; - index?: number; -} - export default class ReusableSSRPage extends ReusablePage { public constructor(options: puppeteer.LaunchOptions, puppeteer: any) { @@ -27,31 +19,32 @@ export default class ReusableSSRPage extends ReusablePage { protected async initPage(): Promise { const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage; + page.clusterGroup = 'slurper'; page.language = null; page.createdAt = Date.now(); const defaultUrl = mempoolHost + '/about'; page.on('pageerror', (err) => { console.log(err); - // page.repairRequested = true; + page.repairRequested = true; }); await page.setRequestInterception(true); - page.on('request', req => { - if (req.isInterceptResolutionHandled()) { - return req.continue(); - } - if (req.resourceType() === 'image') { - return req.respond({ - contentType: 'image/png', - headers: {"Access-Control-Allow-Origin": "*"}, - body: mockImageBuffer - }); - } else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) { - return req.abort(); - } else { - return req.continue(); - } - }); + page.on('request', req => { + if (req.isInterceptResolutionHandled()) { + return req.continue(); + } + if (req.resourceType() === 'image') { + return req.respond({ + contentType: 'image/png', + headers: {"Access-Control-Allow-Origin": "*"}, + body: mockImageBuffer + }); + } else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) { + return req.abort(); + } else { + return req.continue(); + } + }); try { await page.goto(defaultUrl, { waitUntil: "networkidle0" }); await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }); diff --git a/unfurler/src/index.ts b/unfurler/src/index.ts index 69882579f..a81dcbf7a 100644 --- a/unfurler/src/index.ts +++ b/unfurler/src/index.ts @@ -5,7 +5,7 @@ import * as https from 'https'; import config from './config'; import { Cluster } from 'puppeteer-cluster'; import ReusablePage from './concurrency/ReusablePage'; -import ReusableSSRPage from './concurrency/ReusablePage'; +import ReusableSSRPage from './concurrency/ReusableSSRPage'; import { parseLanguageUrl } from './language/lang'; import { matchRoute } from './routes'; import nodejsPath from 'path'; @@ -120,8 +120,9 @@ class Server { this.app.get('*', (req, res) => { return this.renderHTML(req, res, false) }) } - async clusterTask({ page, data: { url, path, action } }) { + async clusterTask({ page, data: { url, path, action, reqUrl } }) { try { + logger.info(`rendering "${reqUrl}" on tab ${page.clusterGroup}:${page.index}`); const urlParts = parseLanguageUrl(path); if (page.language !== urlParts.lang) { // switch language @@ -156,20 +157,21 @@ class Server { }); return screenshot; } else if (success === false) { - logger.warn(`failed to render ${path} for ${action} due to client-side error, e.g. requested an invalid txid`); + logger.warn(`failed to render ${reqUrl} for ${action} due to client-side error, e.g. requested an invalid txid`); page.repairRequested = true; } else { - logger.warn(`failed to render ${path} for ${action} due to puppeteer timeout`); + logger.warn(`failed to render ${reqUrl} for ${action} due to puppeteer timeout`); page.repairRequested = true; } } catch (e) { - logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); + logger.err(`failed to render ${reqUrl} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); page.repairRequested = true; } } - async ssrClusterTask({ page, data: { url, path, action } }) { + async ssrClusterTask({ page, data: { url, path, action, reqUrl } }) { try { + logger.info(`slurping "${reqUrl}" on tab ${page.clusterGroup}:${page.index}`); const urlParts = parseLanguageUrl(path); if (page.language !== urlParts.lang) { // switch language @@ -207,7 +209,7 @@ class Server { let html = await page.content(); return html; } else { - logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); + logger.err(`failed to render ${reqUrl} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); page.repairRequested = true; } } @@ -228,7 +230,9 @@ class Server { // don't bother unless the route is definitely renderable if (rawPath.includes('/preview/') && matchedRoute.render) { - img = await this.cluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'screenshot' }); + img = await this.cluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'screenshot', reqUrl: req.url }); + } else { + logger.info('rendering not enabled for page "' + req.url + '"'); } if (!img) { @@ -258,10 +262,17 @@ class Server { res.status(404).send(); return; } else { + logger.info('proxying resource "' + req.url + '"'); if (this.secureHost) { - https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res)); + https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { + res.writeHead(got.statusCode, got.headers); + return got.pipe(res); + }); } else { - http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res)); + http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => { + res.writeHead(got.statusCode, got.headers); + return got.pipe(res); + }); } return; } @@ -270,9 +281,10 @@ class Server { let result = ''; try { if (unfurl) { + logger.info('unfurling "' + req.url + '"'); result = await this.renderUnfurlMeta(rawPath); } else { - result = await this.renderSEOPage(rawPath); + result = await this.renderSEOPage(rawPath, req.url); } if (result && result.length) { if (result === '404') { @@ -326,8 +338,8 @@ class Server { `; } - async renderSEOPage(rawPath: string): Promise { - let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr' }); + async renderSEOPage(rawPath: string, reqUrl: string): Promise { + let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr', reqUrl }); // remove javascript to prevent double hydration if (html && html.length) { html = html.replaceAll(//g, "");