Merge pull request #4187 from mempool/mononaut/unfurler-debugging

Unfurler debugging
This commit is contained in:
wiz 2023-08-19 23:23:53 +09:00 committed by GitHub
commit 15bd7bc068
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 57 additions and 50 deletions

View File

@ -530,6 +530,7 @@ osCertbotDryRun()
zfsCreateFilesystems()
{
zfs create -o "mountpoint=/backup" "${ZPOOL}/backup"
zfs create -o "mountpoint=/var/cache/nginx" "${ZPOOL}/cache"
zfs create -o "mountpoint=${ELEMENTS_HOME}" "${ZPOOL}/elements"
zfs create -o "mountpoint=${BITCOIN_HOME}" "${ZPOOL}/bitcoin"
@ -1852,8 +1853,6 @@ chown "${MEMPOOL_USER}:${MEMPOOL_GROUP}" "${MEMPOOL_MYSQL_CREDENTIALS}"
echo "[*] Adding Nginx configuration"
osSudo "${ROOT_USER}" install -c -o "${ROOT_USER}" -g "${ROOT_GROUP}" -m 644 "${MEMPOOL_HOME}/${MEMPOOL_REPO_NAME}/production/nginx/nginx.conf" "${NGINX_CONFIGURATION}"
mkdir -p /var/cache/nginx/services /var/cache/nginx/api
chown "${NGINX_USER}:${NGINX_GROUP}" /var/cache/nginx/services /var/cache/nginx/api
ln -s "${MEMPOOL_HOME}/mempool" "${NGINX_ETC_FOLDER}/mempool"
osSudo "${ROOT_USER}" sed -i.orig "s!__NGINX_USER__!${NGINX_USER}!" "${NGINX_CONFIGURATION}"
osSudo "${ROOT_USER}" sed -i.orig "s!__NGINX_ETC_FOLDER__!${NGINX_ETC_FOLDER}!" "${NGINX_CONFIGURATION}"

View File

@ -1,7 +1,7 @@
# proxy cache
proxy_cache_path /var/cache/nginx/api keys_zone=api:20m levels=1:2 inactive=600s max_size=200m;
proxy_cache_path /var/cache/nginx/services keys_zone=services:20m levels=1:2 inactive=600s max_size=200m;
proxy_cache_path /var/cache/nginx/markets keys_zone=markets:20m levels=1:2 inactive=600s max_size=200m;
proxy_cache_path /var/cache/nginx/unfurler keys_zone=unfurler:20m levels=1:2 inactive=600s max_size=200m;
proxy_cache_path /var/cache/nginx/slurper keys_zone=slurper:20m levels=1:2 inactive=600s max_size=200m;
types_hash_max_size 2048;
proxy_cache_path /var/cache/nginx/api keys_zone=api:20m levels=1:2 inactive=365d max_size=2000m;
proxy_cache_path /var/cache/nginx/unfurler keys_zone=unfurler:20m levels=1:2 inactive=365d max_size=2000m;
proxy_cache_path /var/cache/nginx/slurper keys_zone=slurper:20m levels=1:2 inactive=365d max_size=5000m;
proxy_cache_path /var/cache/nginx/services keys_zone=services:20m levels=1:2 inactive=365d max_size=100m;
proxy_cache_path /var/cache/nginx/markets keys_zone=markets:20m levels=1:2 inactive=365d max_size=100m;
types_hash_max_size 4096;

View File

@ -11,12 +11,13 @@ const BROWSER_TIMEOUT = 8000;
const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000;
const maxConcurrency = config.PUPPETEER.CLUSTER_SIZE;
interface RepairablePage extends puppeteer.Page {
export interface RepairablePage extends puppeteer.Page {
repairRequested?: boolean;
language?: string | null;
createdAt?: number;
free?: boolean;
index?: number;
clusterGroup?: string;
}
interface ResourceData {
@ -76,7 +77,7 @@ export default class ReusablePage extends ConcurrencyImplementation {
for (let i = 0; i < maxConcurrency; i++) {
const newPage = await this.initPage();
newPage.index = this.pages.length;
logger.info(`initialized page ${newPage.index}`);
logger.info(`initialized page ${newPage.clusterGroup}:${newPage.index}`);
this.pages.push(newPage);
}
}
@ -87,6 +88,7 @@ export default class ReusablePage extends ConcurrencyImplementation {
protected async initPage(): Promise<RepairablePage> {
const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage;
page.clusterGroup = 'unfurler';
page.language = null;
page.createdAt = Date.now();
let defaultUrl
@ -129,6 +131,7 @@ export default class ReusablePage extends ConcurrencyImplementation {
protected async repairPage(page) {
// create a new page
logger.debug(`Repairing page ${page.clusterGroup}:${page.index}`);
const newPage = await this.initPage();
newPage.free = true;
// replace the old page
@ -138,7 +141,7 @@ export default class ReusablePage extends ConcurrencyImplementation {
try {
await page.goto('about:blank', {timeout: 200}); // prevents memory leak (maybe?)
} catch (e) {
logger.err('unexpected page repair error');
logger.err(`unexpected page repair error ${page.clusterGroup}:${page.index}`);
}
await page.close();
return newPage;

View File

@ -2,19 +2,11 @@ import * as puppeteer from 'puppeteer';
import { timeoutExecute } from 'puppeteer-cluster/dist/util';
import logger from '../logger';
import config from '../config';
import ReusablePage from './ReusablePage';
import ReusablePage, { RepairablePage } from './ReusablePage';
const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
const mockImageBuffer = Buffer.from("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=", 'base64');
interface RepairablePage extends puppeteer.Page {
repairRequested?: boolean;
language?: string | null;
createdAt?: number;
free?: boolean;
index?: number;
}
export default class ReusableSSRPage extends ReusablePage {
public constructor(options: puppeteer.LaunchOptions, puppeteer: any) {
@ -27,31 +19,32 @@ export default class ReusableSSRPage extends ReusablePage {
protected async initPage(): Promise<RepairablePage> {
const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage;
page.clusterGroup = 'slurper';
page.language = null;
page.createdAt = Date.now();
const defaultUrl = mempoolHost + '/about';
page.on('pageerror', (err) => {
console.log(err);
// page.repairRequested = true;
page.repairRequested = true;
});
await page.setRequestInterception(true);
page.on('request', req => {
if (req.isInterceptResolutionHandled()) {
return req.continue();
}
if (req.resourceType() === 'image') {
return req.respond({
contentType: 'image/png',
headers: {"Access-Control-Allow-Origin": "*"},
body: mockImageBuffer
});
} else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) {
return req.abort();
} else {
return req.continue();
}
});
page.on('request', req => {
if (req.isInterceptResolutionHandled()) {
return req.continue();
}
if (req.resourceType() === 'image') {
return req.respond({
contentType: 'image/png',
headers: {"Access-Control-Allow-Origin": "*"},
body: mockImageBuffer
});
} else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) {
return req.abort();
} else {
return req.continue();
}
});
try {
await page.goto(defaultUrl, { waitUntil: "networkidle0" });
await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 });

View File

@ -5,7 +5,7 @@ import * as https from 'https';
import config from './config';
import { Cluster } from 'puppeteer-cluster';
import ReusablePage from './concurrency/ReusablePage';
import ReusableSSRPage from './concurrency/ReusablePage';
import ReusableSSRPage from './concurrency/ReusableSSRPage';
import { parseLanguageUrl } from './language/lang';
import { matchRoute } from './routes';
import nodejsPath from 'path';
@ -120,8 +120,9 @@ class Server {
this.app.get('*', (req, res) => { return this.renderHTML(req, res, false) })
}
async clusterTask({ page, data: { url, path, action } }) {
async clusterTask({ page, data: { url, path, action, reqUrl } }) {
try {
logger.info(`rendering "${reqUrl}" on tab ${page.clusterGroup}:${page.index}`);
const urlParts = parseLanguageUrl(path);
if (page.language !== urlParts.lang) {
// switch language
@ -156,20 +157,21 @@ class Server {
});
return screenshot;
} else if (success === false) {
logger.warn(`failed to render ${path} for ${action} due to client-side error, e.g. requested an invalid txid`);
logger.warn(`failed to render ${reqUrl} for ${action} due to client-side error, e.g. requested an invalid txid`);
page.repairRequested = true;
} else {
logger.warn(`failed to render ${path} for ${action} due to puppeteer timeout`);
logger.warn(`failed to render ${reqUrl} for ${action} due to puppeteer timeout`);
page.repairRequested = true;
}
} catch (e) {
logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`));
logger.err(`failed to render ${reqUrl} for ${action}: ` + (e instanceof Error ? e.message : `${e}`));
page.repairRequested = true;
}
}
async ssrClusterTask({ page, data: { url, path, action } }) {
async ssrClusterTask({ page, data: { url, path, action, reqUrl } }) {
try {
logger.info(`slurping "${reqUrl}" on tab ${page.clusterGroup}:${page.index}`);
const urlParts = parseLanguageUrl(path);
if (page.language !== urlParts.lang) {
// switch language
@ -207,7 +209,7 @@ class Server {
let html = await page.content();
return html;
} else {
logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`));
logger.err(`failed to render ${reqUrl} for ${action}: ` + (e instanceof Error ? e.message : `${e}`));
page.repairRequested = true;
}
}
@ -228,7 +230,9 @@ class Server {
// don't bother unless the route is definitely renderable
if (rawPath.includes('/preview/') && matchedRoute.render) {
img = await this.cluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'screenshot' });
img = await this.cluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'screenshot', reqUrl: req.url });
} else {
logger.info('rendering not enabled for page "' + req.url + '"');
}
if (!img) {
@ -258,10 +262,17 @@ class Server {
res.status(404).send();
return;
} else {
logger.info('proxying resource "' + req.url + '"');
if (this.secureHost) {
https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res));
https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => {
res.writeHead(got.statusCode, got.headers);
return got.pipe(res);
});
} else {
http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res));
http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => {
res.writeHead(got.statusCode, got.headers);
return got.pipe(res);
});
}
return;
}
@ -270,9 +281,10 @@ class Server {
let result = '';
try {
if (unfurl) {
logger.info('unfurling "' + req.url + '"');
result = await this.renderUnfurlMeta(rawPath);
} else {
result = await this.renderSEOPage(rawPath);
result = await this.renderSEOPage(rawPath, req.url);
}
if (result && result.length) {
if (result === '404') {
@ -326,8 +338,8 @@ class Server {
</html>`;
}
async renderSEOPage(rawPath: string): Promise<string> {
let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr' });
async renderSEOPage(rawPath: string, reqUrl: string): Promise<string> {
let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr', reqUrl });
// remove javascript to prevent double hydration
if (html && html.length) {
html = html.replaceAll(/<script.*<\/script>/g, "");