Speed up unfurls by reusing puppeteer sessions
This commit is contained in:
119
unfurler/src/concurrency/ReusablePage.ts
Normal file
119
unfurler/src/concurrency/ReusablePage.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
import * as puppeteer from 'puppeteer';
|
||||
import ConcurrencyImplementation, { ResourceData } from 'puppeteer-cluster/dist/concurrency/ConcurrencyImplementation';
|
||||
import { timeoutExecute } from 'puppeteer-cluster/dist/util';
|
||||
|
||||
import config from '../config';
|
||||
const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
|
||||
|
||||
const BROWSER_TIMEOUT = 5000;
|
||||
// maximum lifetime of a single page session
|
||||
const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000;
|
||||
|
||||
interface repairablePage extends puppeteer.Page {
|
||||
repairRequested?: boolean;
|
||||
}
|
||||
|
||||
export default class ReusablePage extends ConcurrencyImplementation {
|
||||
|
||||
protected browser: puppeteer.Browser | null = null;
|
||||
protected currentPage: repairablePage | null = null;
|
||||
protected pageCreatedAt: number = 0;
|
||||
private repairing: boolean = false;
|
||||
private repairRequested: boolean = false;
|
||||
private openInstances: number = 0;
|
||||
private waitingForRepairResolvers: (() => void)[] = [];
|
||||
|
||||
public constructor(options: puppeteer.LaunchOptions, puppeteer: any) {
|
||||
super(options, puppeteer);
|
||||
}
|
||||
|
||||
private async repair() {
|
||||
if (this.openInstances !== 0 || this.repairing) {
|
||||
// already repairing or there are still pages open? wait for start/finish
|
||||
await new Promise<void>(resolve => this.waitingForRepairResolvers.push(resolve));
|
||||
return;
|
||||
}
|
||||
|
||||
this.repairing = true;
|
||||
console.log('Starting repair');
|
||||
|
||||
try {
|
||||
// will probably fail, but just in case the repair was not necessary
|
||||
await (<puppeteer.Browser>this.browser).close();
|
||||
} catch (e) {
|
||||
console.log('Unable to close browser.');
|
||||
}
|
||||
|
||||
try {
|
||||
this.browser = await this.puppeteer.launch(this.options) as puppeteer.Browser;
|
||||
} catch (err) {
|
||||
throw new Error('Unable to restart chrome.');
|
||||
}
|
||||
this.currentPage = null;
|
||||
this.repairRequested = false;
|
||||
this.repairing = false;
|
||||
this.waitingForRepairResolvers.forEach(resolve => resolve());
|
||||
this.waitingForRepairResolvers = [];
|
||||
await this.createResources();
|
||||
}
|
||||
|
||||
public async init() {
|
||||
this.browser = await this.puppeteer.launch(this.options);
|
||||
}
|
||||
|
||||
public async close() {
|
||||
await (this.browser as puppeteer.Browser).close();
|
||||
}
|
||||
|
||||
protected async createResources(): Promise<ResourceData> {
|
||||
if (!this.currentPage) {
|
||||
this.currentPage = await (this.browser as puppeteer.Browser).newPage();
|
||||
this.pageCreatedAt = Date.now();
|
||||
const defaultUrl = mempoolHost + '/preview/block/1';
|
||||
this.currentPage.on('pageerror', (err) => {
|
||||
this.repairRequested = true;
|
||||
});
|
||||
await this.currentPage.goto(defaultUrl, { waitUntil: "load" });
|
||||
}
|
||||
return {
|
||||
page: this.currentPage
|
||||
}
|
||||
}
|
||||
|
||||
public async workerInstance() {
|
||||
let resources: ResourceData;
|
||||
|
||||
return {
|
||||
jobInstance: async () => {
|
||||
if (this.repairRequested || this.currentPage?.repairRequested) {
|
||||
await this.repair();
|
||||
}
|
||||
|
||||
await timeoutExecute(BROWSER_TIMEOUT, (async () => {
|
||||
resources = await this.createResources();
|
||||
})());
|
||||
this.openInstances += 1;
|
||||
|
||||
return {
|
||||
resources,
|
||||
|
||||
close: async () => {
|
||||
this.openInstances -= 1; // decrement first in case of error
|
||||
|
||||
if (this.repairRequested || this.currentPage?.repairRequested || (Date.now() - this.pageCreatedAt > maxAgeMs)) {
|
||||
await this.repair();
|
||||
}
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
close: async () => {},
|
||||
|
||||
repair: async () => {
|
||||
console.log('Repair requested');
|
||||
this.repairRequested = true;
|
||||
await this.repair();
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ interface IConfig {
|
||||
PUPPETEER: {
|
||||
CLUSTER_SIZE: number;
|
||||
EXEC_PATH?: string;
|
||||
MAX_PAGE_AGE?: number;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import { Application, Request, Response, NextFunction } from 'express';
|
||||
import * as http from 'http';
|
||||
import config from './config';
|
||||
import { Cluster } from 'puppeteer-cluster';
|
||||
import ReusablePage from './concurrency/ReusablePage';
|
||||
const puppeteerConfig = require('../puppeteer.config.json');
|
||||
|
||||
if (config.PUPPETEER.EXEC_PATH) {
|
||||
@@ -32,7 +33,7 @@ class Server {
|
||||
;
|
||||
|
||||
this.cluster = await Cluster.launch({
|
||||
concurrency: Cluster.CONCURRENCY_CONTEXT,
|
||||
concurrency: ReusablePage,
|
||||
maxConcurrency: config.PUPPETEER.CLUSTER_SIZE,
|
||||
puppeteerOptions: puppeteerConfig,
|
||||
});
|
||||
@@ -52,47 +53,40 @@ class Server {
|
||||
this.app.get('*', (req, res) => { return this.renderHTML(req, res) })
|
||||
}
|
||||
|
||||
async clusterTask({ page, data: { url, action } }) {
|
||||
await page.goto(url, { waitUntil: "networkidle0" });
|
||||
switch (action) {
|
||||
case 'screenshot': {
|
||||
await page.evaluate(async () => {
|
||||
// wait for all images to finish loading
|
||||
const imgs = Array.from(document.querySelectorAll("img"));
|
||||
await Promise.all([
|
||||
document.fonts.ready,
|
||||
...imgs.map((img) => {
|
||||
if (img.complete) {
|
||||
if (img.naturalHeight !== 0) return;
|
||||
throw new Error("Image failed to load");
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
img.addEventListener("load", resolve);
|
||||
img.addEventListener("error", reject);
|
||||
});
|
||||
}),
|
||||
]);
|
||||
});
|
||||
const waitForReady = await page.$('meta[property="og:loading"]');
|
||||
const alreadyReady = await page.$('meta[property="og:ready"]');
|
||||
if (waitForReady != null && alreadyReady == null) {
|
||||
try {
|
||||
await page.waitForSelector('meta[property="og:ready]"', { timeout: 10000 });
|
||||
} catch (e) {
|
||||
// probably timed out
|
||||
async clusterTask({ page, data: { url, path, action } }) {
|
||||
try {
|
||||
if (action === 'screenshot' || action === 'html') {
|
||||
const loaded = await page.evaluate(async (path) => {
|
||||
if (window['ogService']) {
|
||||
window['ogService'].loadPage(path);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}, path)
|
||||
|
||||
if (!loaded) {
|
||||
throw new Error('failed to access open graph service');
|
||||
}
|
||||
return page.screenshot();
|
||||
} break;
|
||||
default: {
|
||||
try {
|
||||
await page.waitForSelector('meta[property="og:title"]', { timeout: 10000 })
|
||||
const tag = await page.$('meta[property="og:title"]');
|
||||
} catch (e) {
|
||||
// probably timed out
|
||||
|
||||
if (action === 'screenshot') {
|
||||
const waitForReady = await page.$('meta[property="og:preview:loading"]');
|
||||
const alreadyReady = await page.$('meta[property="og:preview:ready"]');
|
||||
if (waitForReady != null && alreadyReady == null) {
|
||||
await page.waitForSelector('meta[property="og:preview:ready"]', { timeout: 8000 });
|
||||
}
|
||||
return page.screenshot();
|
||||
} else if (action === 'html') {
|
||||
const alreadyReady = await page.$('meta[property="og:meta:ready"]');
|
||||
if (alreadyReady == null) {
|
||||
await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: 8000 });
|
||||
}
|
||||
return page.content();
|
||||
}
|
||||
return page.content();
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(`failed to render page for ${action}`, e instanceof Error ? e.message : e);
|
||||
page.repairRequested = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,8 +94,11 @@ class Server {
|
||||
try {
|
||||
// strip default language code for compatibility
|
||||
const path = req.params[0].replace('/en/', '/');
|
||||
const img = await this.cluster?.execute({ url: this.mempoolHost + path, action: 'screenshot' });
|
||||
const img = await this.cluster?.execute({ url: this.mempoolHost + path, path: path, action: 'screenshot' });
|
||||
|
||||
if (!img) {
|
||||
throw new Error('failed to render preview image');
|
||||
}
|
||||
res.contentType('image/png');
|
||||
res.send(img);
|
||||
} catch (e) {
|
||||
@@ -120,9 +117,14 @@ class Server {
|
||||
}
|
||||
|
||||
try {
|
||||
let html = await this.cluster?.execute({ url: this.mempoolHost + req.params[0], action: 'html' });
|
||||
// strip default language code for compatibility
|
||||
const path = req.params[0].replace('/en/', '/');
|
||||
|
||||
res.send(html)
|
||||
let html = await this.cluster?.execute({ url: this.mempoolHost + req.params[0], path: req.params[0], action: 'html' });
|
||||
if (!html) {
|
||||
throw new Error('failed to render preview image');
|
||||
}
|
||||
res.send(html);
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
res.status(500).send(e instanceof Error ? e.message : e);
|
||||
|
||||
Reference in New Issue
Block a user