extend unfurler to dynamically render search crawler requests
This commit is contained in:
parent
56789532ed
commit
2f27d9279d
@ -7,7 +7,7 @@ import { StateService } from './state.service';
|
|||||||
})
|
})
|
||||||
export class SeoService {
|
export class SeoService {
|
||||||
network = '';
|
network = '';
|
||||||
baseTitle = 'mempool';
|
baseTitle = 'Mempool';
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private titleService: Title,
|
private titleService: Title,
|
||||||
|
65
unfurler/src/concurrency/ReusableSSRPage.ts
Normal file
65
unfurler/src/concurrency/ReusableSSRPage.ts
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import * as puppeteer from 'puppeteer';
|
||||||
|
import { timeoutExecute } from 'puppeteer-cluster/dist/util';
|
||||||
|
import logger from '../logger';
|
||||||
|
import config from '../config';
|
||||||
|
import ReusablePage from './ReusablePage';
|
||||||
|
const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
|
||||||
|
|
||||||
|
const mockImageBuffer = Buffer.from("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=", 'base64');
|
||||||
|
|
||||||
|
interface RepairablePage extends puppeteer.Page {
|
||||||
|
repairRequested?: boolean;
|
||||||
|
language?: string | null;
|
||||||
|
createdAt?: number;
|
||||||
|
free?: boolean;
|
||||||
|
index?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default class ReusableSSRPage extends ReusablePage {
|
||||||
|
|
||||||
|
public constructor(options: puppeteer.LaunchOptions, puppeteer: any) {
|
||||||
|
super(options, puppeteer);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async close() {
|
||||||
|
await (this.browser as puppeteer.Browser).close();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected async initPage(): Promise<RepairablePage> {
|
||||||
|
const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage;
|
||||||
|
page.language = null;
|
||||||
|
page.createdAt = Date.now();
|
||||||
|
const defaultUrl = mempoolHost + '/about';
|
||||||
|
|
||||||
|
page.on('pageerror', (err) => {
|
||||||
|
console.log(err);
|
||||||
|
// page.repairRequested = true;
|
||||||
|
});
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', req => {
|
||||||
|
if (req.isInterceptResolutionHandled()) {
|
||||||
|
return req.continue();
|
||||||
|
}
|
||||||
|
if (req.resourceType() === 'image') {
|
||||||
|
return req.respond({
|
||||||
|
contentType: 'image/png',
|
||||||
|
headers: {"Access-Control-Allow-Origin": "*"},
|
||||||
|
body: mockImageBuffer
|
||||||
|
});
|
||||||
|
} else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) {
|
||||||
|
return req.abort();
|
||||||
|
} else {
|
||||||
|
return req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
await page.goto(defaultUrl, { waitUntil: "networkidle0" });
|
||||||
|
await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 });
|
||||||
|
} catch (e) {
|
||||||
|
logger.err(`failed to load frontend during ssr page initialization: ` + (e instanceof Error ? e.message : `${e}`));
|
||||||
|
page.repairRequested = true;
|
||||||
|
}
|
||||||
|
page.free = true;
|
||||||
|
return page
|
||||||
|
}
|
||||||
|
}
|
@ -5,9 +5,11 @@ import * as https from 'https';
|
|||||||
import config from './config';
|
import config from './config';
|
||||||
import { Cluster } from 'puppeteer-cluster';
|
import { Cluster } from 'puppeteer-cluster';
|
||||||
import ReusablePage from './concurrency/ReusablePage';
|
import ReusablePage from './concurrency/ReusablePage';
|
||||||
|
import ReusableSSRPage from './concurrency/ReusablePage';
|
||||||
import { parseLanguageUrl } from './language/lang';
|
import { parseLanguageUrl } from './language/lang';
|
||||||
import { matchRoute } from './routes';
|
import { matchRoute } from './routes';
|
||||||
import logger from './logger';
|
import logger from './logger';
|
||||||
|
import { TimeoutError } from "puppeteer";
|
||||||
const puppeteerConfig = require('../puppeteer.config.json');
|
const puppeteerConfig = require('../puppeteer.config.json');
|
||||||
|
|
||||||
if (config.PUPPETEER.EXEC_PATH) {
|
if (config.PUPPETEER.EXEC_PATH) {
|
||||||
@ -20,13 +22,16 @@ class Server {
|
|||||||
private server: http.Server | undefined;
|
private server: http.Server | undefined;
|
||||||
private app: Application;
|
private app: Application;
|
||||||
cluster?: Cluster;
|
cluster?: Cluster;
|
||||||
|
ssrCluster?: Cluster;
|
||||||
mempoolHost: string;
|
mempoolHost: string;
|
||||||
|
mempoolUrl: URL;
|
||||||
network: string;
|
network: string;
|
||||||
secureHost = true;
|
secureHost = true;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.app = express();
|
this.app = express();
|
||||||
this.mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
|
this.mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : '');
|
||||||
|
this.mempoolUrl = new URL(this.mempoolHost);
|
||||||
this.secureHost = config.SERVER.HOST.startsWith('https');
|
this.secureHost = config.SERVER.HOST.startsWith('https');
|
||||||
this.network = config.MEMPOOL.NETWORK || 'bitcoin';
|
this.network = config.MEMPOOL.NETWORK || 'bitcoin';
|
||||||
this.startServer();
|
this.startServer();
|
||||||
@ -49,6 +54,12 @@ class Server {
|
|||||||
puppeteerOptions: puppeteerConfig,
|
puppeteerOptions: puppeteerConfig,
|
||||||
});
|
});
|
||||||
await this.cluster?.task(async (args) => { return this.clusterTask(args) });
|
await this.cluster?.task(async (args) => { return this.clusterTask(args) });
|
||||||
|
this.ssrCluster = await Cluster.launch({
|
||||||
|
concurrency: ReusableSSRPage,
|
||||||
|
maxConcurrency: config.PUPPETEER.CLUSTER_SIZE,
|
||||||
|
puppeteerOptions: puppeteerConfig,
|
||||||
|
});
|
||||||
|
await this.ssrCluster?.task(async (args) => { return this.ssrClusterTask(args) });
|
||||||
}
|
}
|
||||||
|
|
||||||
this.setUpRoutes();
|
this.setUpRoutes();
|
||||||
@ -65,6 +76,10 @@ class Server {
|
|||||||
await this.cluster.idle();
|
await this.cluster.idle();
|
||||||
await this.cluster.close();
|
await this.cluster.close();
|
||||||
}
|
}
|
||||||
|
if (this.ssrCluster) {
|
||||||
|
await this.ssrCluster.idle();
|
||||||
|
await this.ssrCluster.close();
|
||||||
|
}
|
||||||
if (this.server) {
|
if (this.server) {
|
||||||
await this.server.close();
|
await this.server.close();
|
||||||
}
|
}
|
||||||
@ -102,8 +117,8 @@ class Server {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// wait for preview component to initialize
|
// wait for preview component to initialize
|
||||||
await page.waitForSelector('meta[property="og:preview:loading"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 })
|
|
||||||
let success;
|
let success;
|
||||||
|
await page.waitForSelector('meta[property="og:preview:loading"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 })
|
||||||
success = await Promise.race([
|
success = await Promise.race([
|
||||||
page.waitForSelector('meta[property="og:preview:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => true),
|
page.waitForSelector('meta[property="og:preview:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => true),
|
||||||
page.waitForSelector('meta[property="og:preview:fail"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => false)
|
page.waitForSelector('meta[property="og:preview:fail"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => false)
|
||||||
@ -124,6 +139,44 @@ class Server {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async ssrClusterTask({ page, data: { url, path, action } }) {
|
||||||
|
try {
|
||||||
|
const urlParts = parseLanguageUrl(path);
|
||||||
|
if (page.language !== urlParts.lang) {
|
||||||
|
// switch language
|
||||||
|
page.language = urlParts.lang;
|
||||||
|
const localizedUrl = urlParts.lang ? `${this.mempoolHost}/${urlParts.lang}${urlParts.path}` : `${this.mempoolHost}${urlParts.path}` ;
|
||||||
|
await page.goto(localizedUrl, { waitUntil: "load" });
|
||||||
|
} else {
|
||||||
|
const loaded = await page.evaluate(async (path) => {
|
||||||
|
if (window['ogService']) {
|
||||||
|
window['ogService'].loadPage(path);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, urlParts.path);
|
||||||
|
if (!loaded) {
|
||||||
|
throw new Error('failed to access open graph service');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.waitForNetworkIdle({
|
||||||
|
timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000,
|
||||||
|
});
|
||||||
|
let html = await page.content();
|
||||||
|
return html;
|
||||||
|
} catch (e) {
|
||||||
|
if (e instanceof TimeoutError) {
|
||||||
|
let html = await page.content();
|
||||||
|
return html;
|
||||||
|
} else {
|
||||||
|
logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`));
|
||||||
|
page.repairRequested = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async renderDisabled(req, res) {
|
async renderDisabled(req, res) {
|
||||||
res.status(500).send("preview rendering disabled");
|
res.status(500).send("preview rendering disabled");
|
||||||
}
|
}
|
||||||
@ -163,11 +216,44 @@ class Server {
|
|||||||
// drop requests for static files
|
// drop requests for static files
|
||||||
const rawPath = req.params[0];
|
const rawPath = req.params[0];
|
||||||
const match = rawPath.match(/\.[\w]+$/);
|
const match = rawPath.match(/\.[\w]+$/);
|
||||||
if (match?.length && match[0] !== '.html') {
|
if (match?.length && match[0] !== '.html'
|
||||||
|
|| rawPath.startsWith('/api/v1/donations/images')
|
||||||
|
|| rawPath.startsWith('/api/v1/contributors/images')
|
||||||
|
|| rawPath.startsWith('/api/v1/translators/images')
|
||||||
|
|| rawPath.startsWith('/resources/profile')
|
||||||
|
) {
|
||||||
|
if (req.headers['user-agent'] === 'googlebot') {
|
||||||
|
if (this.secureHost) {
|
||||||
|
https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res));
|
||||||
|
} else {
|
||||||
|
http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
res.status(404).send();
|
res.status(404).send();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = '';
|
||||||
|
try {
|
||||||
|
if (req.headers['user-agent'] === 'googlebot') {
|
||||||
|
result = await this.renderSEOPage(rawPath);
|
||||||
|
} else {
|
||||||
|
result = await this.renderUnfurlMeta(rawPath);
|
||||||
|
}
|
||||||
|
if (result && result.length) {
|
||||||
|
res.send(result);
|
||||||
|
} else {
|
||||||
|
res.status(500).send();
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
logger.err(e instanceof Error ? e.message : `${e} ${req.params[0]}`);
|
||||||
|
res.status(500).send(e instanceof Error ? e.message : e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async renderUnfurlMeta(rawPath: string): Promise<string> {
|
||||||
const { lang, path } = parseLanguageUrl(rawPath);
|
const { lang, path } = parseLanguageUrl(rawPath);
|
||||||
const matchedRoute = matchRoute(this.network, path);
|
const matchedRoute = matchRoute(this.network, path);
|
||||||
let ogImageUrl = config.SERVER.HOST + (matchedRoute.staticImg || matchedRoute.fallbackImg);
|
let ogImageUrl = config.SERVER.HOST + (matchedRoute.staticImg || matchedRoute.fallbackImg);
|
||||||
@ -178,7 +264,7 @@ class Server {
|
|||||||
ogTitle = `${this.network ? capitalize(this.network) + ' ' : ''}${matchedRoute.networkMode !== 'mainnet' ? capitalize(matchedRoute.networkMode) + ' ' : ''}${matchedRoute.title}`;
|
ogTitle = `${this.network ? capitalize(this.network) + ' ' : ''}${matchedRoute.networkMode !== 'mainnet' ? capitalize(matchedRoute.networkMode) + ' ' : ''}${matchedRoute.title}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
res.send(`
|
return `
|
||||||
<!doctype html>
|
<!doctype html>
|
||||||
<html lang="en-US" dir="ltr">
|
<html lang="en-US" dir="ltr">
|
||||||
<head>
|
<head>
|
||||||
@ -199,7 +285,16 @@ class Server {
|
|||||||
<meta property="twitter:domain" content="mempool.space">
|
<meta property="twitter:domain" content="mempool.space">
|
||||||
<body></body>
|
<body></body>
|
||||||
</html>
|
</html>
|
||||||
`);
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async renderSEOPage(rawPath: string): Promise<string> {
|
||||||
|
let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr' });
|
||||||
|
// remove javascript to prevent double hydration
|
||||||
|
if (html && html.length) {
|
||||||
|
html = html.replace(/<script.*<\/script>/g, "");
|
||||||
|
}
|
||||||
|
return html;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user