Speed up unfurls by reusing puppeteer sessions

This commit is contained in:
Mononaut
2022-08-02 00:37:54 +00:00
parent 0257c83873
commit fa51a1272a
10 changed files with 400 additions and 89 deletions

View File

@@ -0,0 +1,119 @@
import * as puppeteer from 'puppeteer';
import ConcurrencyImplementation, { ResourceData } from 'puppeteer-cluster/dist/concurrency/ConcurrencyImplementation';
import { timeoutExecute } from 'puppeteer-cluster/dist/util';
import config from '../config';
const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
const BROWSER_TIMEOUT = 5000;
// maximum lifetime of a single page session
const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000;
interface repairablePage extends puppeteer.Page {
repairRequested?: boolean;
}
export default class ReusablePage extends ConcurrencyImplementation {
protected browser: puppeteer.Browser | null = null;
protected currentPage: repairablePage | null = null;
protected pageCreatedAt: number = 0;
private repairing: boolean = false;
private repairRequested: boolean = false;
private openInstances: number = 0;
private waitingForRepairResolvers: (() => void)[] = [];
public constructor(options: puppeteer.LaunchOptions, puppeteer: any) {
super(options, puppeteer);
}
private async repair() {
if (this.openInstances !== 0 || this.repairing) {
// already repairing or there are still pages open? wait for start/finish
await new Promise<void>(resolve => this.waitingForRepairResolvers.push(resolve));
return;
}
this.repairing = true;
console.log('Starting repair');
try {
// will probably fail, but just in case the repair was not necessary
await (<puppeteer.Browser>this.browser).close();
} catch (e) {
console.log('Unable to close browser.');
}
try {
this.browser = await this.puppeteer.launch(this.options) as puppeteer.Browser;
} catch (err) {
throw new Error('Unable to restart chrome.');
}
this.currentPage = null;
this.repairRequested = false;
this.repairing = false;
this.waitingForRepairResolvers.forEach(resolve => resolve());
this.waitingForRepairResolvers = [];
await this.createResources();
}
public async init() {
this.browser = await this.puppeteer.launch(this.options);
}
public async close() {
await (this.browser as puppeteer.Browser).close();
}
protected async createResources(): Promise<ResourceData> {
if (!this.currentPage) {
this.currentPage = await (this.browser as puppeteer.Browser).newPage();
this.pageCreatedAt = Date.now();
const defaultUrl = mempoolHost + '/preview/block/1';
this.currentPage.on('pageerror', (err) => {
this.repairRequested = true;
});
await this.currentPage.goto(defaultUrl, { waitUntil: "load" });
}
return {
page: this.currentPage
}
}
public async workerInstance() {
let resources: ResourceData;
return {
jobInstance: async () => {
if (this.repairRequested || this.currentPage?.repairRequested) {
await this.repair();
}
await timeoutExecute(BROWSER_TIMEOUT, (async () => {
resources = await this.createResources();
})());
this.openInstances += 1;
return {
resources,
close: async () => {
this.openInstances -= 1; // decrement first in case of error
if (this.repairRequested || this.currentPage?.repairRequested || (Date.now() - this.pageCreatedAt > maxAgeMs)) {
await this.repair();
}
},
};
},
close: async () => {},
repair: async () => {
console.log('Repair requested');
this.repairRequested = true;
await this.repair();
},
};
}
}

View File

@@ -12,6 +12,7 @@ interface IConfig {
PUPPETEER: {
CLUSTER_SIZE: number;
EXEC_PATH?: string;
MAX_PAGE_AGE?: number;
};
}

View File

@@ -3,6 +3,7 @@ import { Application, Request, Response, NextFunction } from 'express';
import * as http from 'http';
import config from './config';
import { Cluster } from 'puppeteer-cluster';
import ReusablePage from './concurrency/ReusablePage';
const puppeteerConfig = require('../puppeteer.config.json');
if (config.PUPPETEER.EXEC_PATH) {
@@ -32,7 +33,7 @@ class Server {
;
this.cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
concurrency: ReusablePage,
maxConcurrency: config.PUPPETEER.CLUSTER_SIZE,
puppeteerOptions: puppeteerConfig,
});
@@ -52,47 +53,40 @@ class Server {
this.app.get('*', (req, res) => { return this.renderHTML(req, res) })
}
async clusterTask({ page, data: { url, action } }) {
await page.goto(url, { waitUntil: "networkidle0" });
switch (action) {
case 'screenshot': {
await page.evaluate(async () => {
// wait for all images to finish loading
const imgs = Array.from(document.querySelectorAll("img"));
await Promise.all([
document.fonts.ready,
...imgs.map((img) => {
if (img.complete) {
if (img.naturalHeight !== 0) return;
throw new Error("Image failed to load");
}
return new Promise((resolve, reject) => {
img.addEventListener("load", resolve);
img.addEventListener("error", reject);
});
}),
]);
});
const waitForReady = await page.$('meta[property="og:loading"]');
const alreadyReady = await page.$('meta[property="og:ready"]');
if (waitForReady != null && alreadyReady == null) {
try {
await page.waitForSelector('meta[property="og:ready]"', { timeout: 10000 });
} catch (e) {
// probably timed out
async clusterTask({ page, data: { url, path, action } }) {
try {
if (action === 'screenshot' || action === 'html') {
const loaded = await page.evaluate(async (path) => {
if (window['ogService']) {
window['ogService'].loadPage(path);
return true;
} else {
return false;
}
}, path)
if (!loaded) {
throw new Error('failed to access open graph service');
}
return page.screenshot();
} break;
default: {
try {
await page.waitForSelector('meta[property="og:title"]', { timeout: 10000 })
const tag = await page.$('meta[property="og:title"]');
} catch (e) {
// probably timed out
if (action === 'screenshot') {
const waitForReady = await page.$('meta[property="og:preview:loading"]');
const alreadyReady = await page.$('meta[property="og:preview:ready"]');
if (waitForReady != null && alreadyReady == null) {
await page.waitForSelector('meta[property="og:preview:ready"]', { timeout: 8000 });
}
return page.screenshot();
} else if (action === 'html') {
const alreadyReady = await page.$('meta[property="og:meta:ready"]');
if (alreadyReady == null) {
await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: 8000 });
}
return page.content();
}
return page.content();
}
} catch (e) {
console.log(`failed to render page for ${action}`, e instanceof Error ? e.message : e);
page.repairRequested = true;
}
}
@@ -100,8 +94,11 @@ class Server {
try {
// strip default language code for compatibility
const path = req.params[0].replace('/en/', '/');
const img = await this.cluster?.execute({ url: this.mempoolHost + path, action: 'screenshot' });
const img = await this.cluster?.execute({ url: this.mempoolHost + path, path: path, action: 'screenshot' });
if (!img) {
throw new Error('failed to render preview image');
}
res.contentType('image/png');
res.send(img);
} catch (e) {
@@ -120,9 +117,14 @@ class Server {
}
try {
let html = await this.cluster?.execute({ url: this.mempoolHost + req.params[0], action: 'html' });
// strip default language code for compatibility
const path = req.params[0].replace('/en/', '/');
res.send(html)
let html = await this.cluster?.execute({ url: this.mempoolHost + req.params[0], path: req.params[0], action: 'html' });
if (!html) {
throw new Error('failed to render preview image');
}
res.send(html);
} catch (e) {
console.log(e);
res.status(500).send(e instanceof Error ? e.message : e);