extend unfurler to dynamically render search crawler requests
This commit is contained in:
parent
56789532ed
commit
2f27d9279d
@ -7,7 +7,7 @@ import { StateService } from './state.service';
|
||||
})
|
||||
export class SeoService {
|
||||
network = '';
|
||||
baseTitle = 'mempool';
|
||||
baseTitle = 'Mempool';
|
||||
|
||||
constructor(
|
||||
private titleService: Title,
|
||||
|
65
unfurler/src/concurrency/ReusableSSRPage.ts
Normal file
65
unfurler/src/concurrency/ReusableSSRPage.ts
Normal file
@ -0,0 +1,65 @@
|
||||
import * as puppeteer from 'puppeteer';
|
||||
import { timeoutExecute } from 'puppeteer-cluster/dist/util';
|
||||
import logger from '../logger';
|
||||
import config from '../config';
|
||||
import ReusablePage from './ReusablePage';
|
||||
const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
|
||||
|
||||
const mockImageBuffer = Buffer.from("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=", 'base64');
|
||||
|
||||
interface RepairablePage extends puppeteer.Page {
|
||||
repairRequested?: boolean;
|
||||
language?: string | null;
|
||||
createdAt?: number;
|
||||
free?: boolean;
|
||||
index?: number;
|
||||
}
|
||||
|
||||
export default class ReusableSSRPage extends ReusablePage {
|
||||
|
||||
public constructor(options: puppeteer.LaunchOptions, puppeteer: any) {
|
||||
super(options, puppeteer);
|
||||
}
|
||||
|
||||
public async close() {
|
||||
await (this.browser as puppeteer.Browser).close();
|
||||
}
|
||||
|
||||
protected async initPage(): Promise<RepairablePage> {
|
||||
const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage;
|
||||
page.language = null;
|
||||
page.createdAt = Date.now();
|
||||
const defaultUrl = mempoolHost + '/about';
|
||||
|
||||
page.on('pageerror', (err) => {
|
||||
console.log(err);
|
||||
// page.repairRequested = true;
|
||||
});
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', req => {
|
||||
if (req.isInterceptResolutionHandled()) {
|
||||
return req.continue();
|
||||
}
|
||||
if (req.resourceType() === 'image') {
|
||||
return req.respond({
|
||||
contentType: 'image/png',
|
||||
headers: {"Access-Control-Allow-Origin": "*"},
|
||||
body: mockImageBuffer
|
||||
});
|
||||
} else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) {
|
||||
return req.abort();
|
||||
} else {
|
||||
return req.continue();
|
||||
}
|
||||
});
|
||||
try {
|
||||
await page.goto(defaultUrl, { waitUntil: "networkidle0" });
|
||||
await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 });
|
||||
} catch (e) {
|
||||
logger.err(`failed to load frontend during ssr page initialization: ` + (e instanceof Error ? e.message : `${e}`));
|
||||
page.repairRequested = true;
|
||||
}
|
||||
page.free = true;
|
||||
return page
|
||||
}
|
||||
}
|
@ -5,9 +5,11 @@ import * as https from 'https';
|
||||
import config from './config';
|
||||
import { Cluster } from 'puppeteer-cluster';
|
||||
import ReusablePage from './concurrency/ReusablePage';
|
||||
import ReusableSSRPage from './concurrency/ReusablePage';
|
||||
import { parseLanguageUrl } from './language/lang';
|
||||
import { matchRoute } from './routes';
|
||||
import logger from './logger';
|
||||
import { TimeoutError } from "puppeteer";
|
||||
const puppeteerConfig = require('../puppeteer.config.json');
|
||||
|
||||
if (config.PUPPETEER.EXEC_PATH) {
|
||||
@ -20,13 +22,16 @@ class Server {
|
||||
private server: http.Server | undefined;
|
||||
private app: Application;
|
||||
cluster?: Cluster;
|
||||
ssrCluster?: Cluster;
|
||||
mempoolHost: string;
|
||||
mempoolUrl: URL;
|
||||
network: string;
|
||||
secureHost = true;
|
||||
|
||||
constructor() {
|
||||
this.app = express();
|
||||
this.mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
|
||||
this.mempoolUrl = new URL(this.mempoolHost);
|
||||
this.secureHost = config.SERVER.HOST.startsWith('https');
|
||||
this.network = config.MEMPOOL.NETWORK || 'bitcoin';
|
||||
this.startServer();
|
||||
@ -49,6 +54,12 @@ class Server {
|
||||
puppeteerOptions: puppeteerConfig,
|
||||
});
|
||||
await this.cluster?.task(async (args) => { return this.clusterTask(args) });
|
||||
this.ssrCluster = await Cluster.launch({
|
||||
concurrency: ReusableSSRPage,
|
||||
maxConcurrency: config.PUPPETEER.CLUSTER_SIZE,
|
||||
puppeteerOptions: puppeteerConfig,
|
||||
});
|
||||
await this.ssrCluster?.task(async (args) => { return this.ssrClusterTask(args) });
|
||||
}
|
||||
|
||||
this.setUpRoutes();
|
||||
@ -65,6 +76,10 @@ class Server {
|
||||
await this.cluster.idle();
|
||||
await this.cluster.close();
|
||||
}
|
||||
if (this.ssrCluster) {
|
||||
await this.ssrCluster.idle();
|
||||
await this.ssrCluster.close();
|
||||
}
|
||||
if (this.server) {
|
||||
await this.server.close();
|
||||
}
|
||||
@ -102,8 +117,8 @@ class Server {
|
||||
}
|
||||
|
||||
// wait for preview component to initialize
|
||||
await page.waitForSelector('meta[property="og:preview:loading"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 })
|
||||
let success;
|
||||
await page.waitForSelector('meta[property="og:preview:loading"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 })
|
||||
success = await Promise.race([
|
||||
page.waitForSelector('meta[property="og:preview:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => true),
|
||||
page.waitForSelector('meta[property="og:preview:fail"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => false)
|
||||
@ -124,6 +139,44 @@ class Server {
|
||||
}
|
||||
}
|
||||
|
||||
async ssrClusterTask({ page, data: { url, path, action } }) {
|
||||
try {
|
||||
const urlParts = parseLanguageUrl(path);
|
||||
if (page.language !== urlParts.lang) {
|
||||
// switch language
|
||||
page.language = urlParts.lang;
|
||||
const localizedUrl = urlParts.lang ? `${this.mempoolHost}/${urlParts.lang}${urlParts.path}` : `${this.mempoolHost}${urlParts.path}` ;
|
||||
await page.goto(localizedUrl, { waitUntil: "load" });
|
||||
} else {
|
||||
const loaded = await page.evaluate(async (path) => {
|
||||
if (window['ogService']) {
|
||||
window['ogService'].loadPage(path);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}, urlParts.path);
|
||||
if (!loaded) {
|
||||
throw new Error('failed to access open graph service');
|
||||
}
|
||||
}
|
||||
|
||||
await page.waitForNetworkIdle({
|
||||
timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000,
|
||||
});
|
||||
let html = await page.content();
|
||||
return html;
|
||||
} catch (e) {
|
||||
if (e instanceof TimeoutError) {
|
||||
let html = await page.content();
|
||||
return html;
|
||||
} else {
|
||||
logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`));
|
||||
page.repairRequested = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async renderDisabled(req, res) {
|
||||
res.status(500).send("preview rendering disabled");
|
||||
}
|
||||
@ -163,11 +216,44 @@ class Server {
|
||||
// drop requests for static files
|
||||
const rawPath = req.params[0];
|
||||
const match = rawPath.match(/\.[\w]+$/);
|
||||
if (match?.length && match[0] !== '.html') {
|
||||
res.status(404).send();
|
||||
return;
|
||||
if (match?.length && match[0] !== '.html'
|
||||
|| rawPath.startsWith('/api/v1/donations/images')
|
||||
|| rawPath.startsWith('/api/v1/contributors/images')
|
||||
|| rawPath.startsWith('/api/v1/translators/images')
|
||||
|| rawPath.startsWith('/resources/profile')
|
||||
) {
|
||||
if (req.headers['user-agent'] === 'googlebot') {
|
||||
if (this.secureHost) {
|
||||
https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res));
|
||||
} else {
|
||||
http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res));
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
res.status(404).send();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let result = '';
|
||||
try {
|
||||
if (req.headers['user-agent'] === 'googlebot') {
|
||||
result = await this.renderSEOPage(rawPath);
|
||||
} else {
|
||||
result = await this.renderUnfurlMeta(rawPath);
|
||||
}
|
||||
if (result && result.length) {
|
||||
res.send(result);
|
||||
} else {
|
||||
res.status(500).send();
|
||||
}
|
||||
} catch (e) {
|
||||
logger.err(e instanceof Error ? e.message : `${e} ${req.params[0]}`);
|
||||
res.status(500).send(e instanceof Error ? e.message : e);
|
||||
}
|
||||
}
|
||||
|
||||
async renderUnfurlMeta(rawPath: string): Promise<string> {
|
||||
const { lang, path } = parseLanguageUrl(rawPath);
|
||||
const matchedRoute = matchRoute(this.network, path);
|
||||
let ogImageUrl = config.SERVER.HOST + (matchedRoute.staticImg || matchedRoute.fallbackImg);
|
||||
@ -178,7 +264,7 @@ class Server {
|
||||
ogTitle = `${this.network ? capitalize(this.network) + ' ' : ''}${matchedRoute.networkMode !== 'mainnet' ? capitalize(matchedRoute.networkMode) + ' ' : ''}${matchedRoute.title}`;
|
||||
}
|
||||
|
||||
res.send(`
|
||||
return `
|
||||
<!doctype html>
|
||||
<html lang="en-US" dir="ltr">
|
||||
<head>
|
||||
@ -199,7 +285,16 @@ class Server {
|
||||
<meta property="twitter:domain" content="mempool.space">
|
||||
<body></body>
|
||||
</html>
|
||||
`);
|
||||
`;
|
||||
}
|
||||
|
||||
async renderSEOPage(rawPath: string): Promise<string> {
|
||||
let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr' });
|
||||
// remove javascript to prevent double hydration
|
||||
if (html && html.length) {
|
||||
html = html.replace(/<script.*<\/script>/g, "");
|
||||
}
|
||||
return html;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user