extend unfurler to dynamically render search crawler requests
This commit is contained in:
		
							parent
							
								
									56789532ed
								
							
						
					
					
						commit
						2f27d9279d
					
				| @ -7,7 +7,7 @@ import { StateService } from './state.service'; | |||||||
| }) | }) | ||||||
| export class SeoService { | export class SeoService { | ||||||
|   network = ''; |   network = ''; | ||||||
|   baseTitle = 'mempool'; |   baseTitle = 'Mempool'; | ||||||
| 
 | 
 | ||||||
|   constructor( |   constructor( | ||||||
|     private titleService: Title, |     private titleService: Title, | ||||||
|  | |||||||
							
								
								
									
										65
									
								
								unfurler/src/concurrency/ReusableSSRPage.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								unfurler/src/concurrency/ReusableSSRPage.ts
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | |||||||
|  | import * as puppeteer from 'puppeteer'; | ||||||
|  | import { timeoutExecute } from 'puppeteer-cluster/dist/util'; | ||||||
|  | import logger from '../logger'; | ||||||
|  | import config from '../config'; | ||||||
|  | import ReusablePage from './ReusablePage'; | ||||||
|  | const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : ''); | ||||||
|  | 
 | ||||||
|  | const mockImageBuffer = Buffer.from("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=", 'base64'); | ||||||
|  | 
 | ||||||
|  | interface RepairablePage extends puppeteer.Page { | ||||||
|  |   repairRequested?: boolean; | ||||||
|  |   language?: string | null; | ||||||
|  |   createdAt?: number; | ||||||
|  |   free?: boolean; | ||||||
|  |   index?: number; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | export default class ReusableSSRPage extends ReusablePage { | ||||||
|  | 
 | ||||||
|  |   public constructor(options: puppeteer.LaunchOptions, puppeteer: any) { | ||||||
|  |     super(options, puppeteer); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   public async close() { | ||||||
|  |     await (this.browser as puppeteer.Browser).close(); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   protected async initPage(): Promise<RepairablePage> { | ||||||
|  |     const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage; | ||||||
|  |     page.language = null; | ||||||
|  |     page.createdAt = Date.now(); | ||||||
|  |     const defaultUrl = mempoolHost + '/about'; | ||||||
|  | 
 | ||||||
|  |     page.on('pageerror', (err) => { | ||||||
|  |       console.log(err); | ||||||
|  |       // page.repairRequested = true;
 | ||||||
|  |     }); | ||||||
|  |     await page.setRequestInterception(true); | ||||||
|  |       page.on('request', req => { | ||||||
|  |         if (req.isInterceptResolutionHandled()) { | ||||||
|  |           return req.continue(); | ||||||
|  |         } | ||||||
|  |         if (req.resourceType() === 'image') { | ||||||
|  |           return req.respond({ | ||||||
|  |             contentType: 'image/png', | ||||||
|  |             headers: {"Access-Control-Allow-Origin": "*"}, | ||||||
|  |             body: mockImageBuffer | ||||||
|  |           }); | ||||||
|  |         } else if (!['document', 'script', 'xhr', 'fetch'].includes(req.resourceType())) { | ||||||
|  |           return req.abort(); | ||||||
|  |         } else { | ||||||
|  |           return req.continue(); | ||||||
|  |         } | ||||||
|  |       }); | ||||||
|  |     try { | ||||||
|  |       await page.goto(defaultUrl, { waitUntil: "networkidle0" }); | ||||||
|  |       await page.waitForSelector('meta[property="og:meta:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }); | ||||||
|  |     } catch (e) { | ||||||
|  |       logger.err(`failed to load frontend during ssr page initialization: ` + (e instanceof Error ? e.message : `${e}`)); | ||||||
|  |       page.repairRequested = true; | ||||||
|  |     } | ||||||
|  |     page.free = true; | ||||||
|  |     return page | ||||||
|  |   } | ||||||
|  | } | ||||||
| @ -5,9 +5,11 @@ import * as https from 'https'; | |||||||
| import config from './config'; | import config from './config'; | ||||||
| import { Cluster } from 'puppeteer-cluster'; | import { Cluster } from 'puppeteer-cluster'; | ||||||
| import ReusablePage from './concurrency/ReusablePage'; | import ReusablePage from './concurrency/ReusablePage'; | ||||||
|  | import ReusableSSRPage from './concurrency/ReusablePage'; | ||||||
| import { parseLanguageUrl } from './language/lang'; | import { parseLanguageUrl } from './language/lang'; | ||||||
| import { matchRoute } from './routes'; | import { matchRoute } from './routes'; | ||||||
| import logger from './logger'; | import logger from './logger'; | ||||||
|  | import { TimeoutError } from "puppeteer"; | ||||||
| const puppeteerConfig = require('../puppeteer.config.json'); | const puppeteerConfig = require('../puppeteer.config.json'); | ||||||
| 
 | 
 | ||||||
| if (config.PUPPETEER.EXEC_PATH) { | if (config.PUPPETEER.EXEC_PATH) { | ||||||
| @ -20,13 +22,16 @@ class Server { | |||||||
|   private server: http.Server | undefined; |   private server: http.Server | undefined; | ||||||
|   private app: Application; |   private app: Application; | ||||||
|   cluster?: Cluster; |   cluster?: Cluster; | ||||||
|  |   ssrCluster?: Cluster; | ||||||
|   mempoolHost: string; |   mempoolHost: string; | ||||||
|  |   mempoolUrl: URL; | ||||||
|   network: string; |   network: string; | ||||||
|   secureHost = true; |   secureHost = true; | ||||||
| 
 | 
 | ||||||
|   constructor() { |   constructor() { | ||||||
|     this.app = express(); |     this.app = express(); | ||||||
|     this.mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : ''); |     this.mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' + config.MEMPOOL.HTTP_PORT : ''); | ||||||
|  |     this.mempoolUrl = new URL(this.mempoolHost); | ||||||
|     this.secureHost = config.SERVER.HOST.startsWith('https'); |     this.secureHost = config.SERVER.HOST.startsWith('https'); | ||||||
|     this.network = config.MEMPOOL.NETWORK || 'bitcoin'; |     this.network = config.MEMPOOL.NETWORK || 'bitcoin'; | ||||||
|     this.startServer(); |     this.startServer(); | ||||||
| @ -49,6 +54,12 @@ class Server { | |||||||
|           puppeteerOptions: puppeteerConfig, |           puppeteerOptions: puppeteerConfig, | ||||||
|       }); |       }); | ||||||
|       await this.cluster?.task(async (args) => { return this.clusterTask(args) }); |       await this.cluster?.task(async (args) => { return this.clusterTask(args) }); | ||||||
|  |       this.ssrCluster = await Cluster.launch({ | ||||||
|  |         concurrency: ReusableSSRPage, | ||||||
|  |         maxConcurrency: config.PUPPETEER.CLUSTER_SIZE, | ||||||
|  |         puppeteerOptions: puppeteerConfig, | ||||||
|  |       }); | ||||||
|  |       await this.ssrCluster?.task(async (args) => { return this.ssrClusterTask(args) }); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     this.setUpRoutes(); |     this.setUpRoutes(); | ||||||
| @ -65,6 +76,10 @@ class Server { | |||||||
|       await this.cluster.idle(); |       await this.cluster.idle(); | ||||||
|       await this.cluster.close(); |       await this.cluster.close(); | ||||||
|     } |     } | ||||||
|  |     if (this.ssrCluster) { | ||||||
|  |       await this.ssrCluster.idle(); | ||||||
|  |       await this.ssrCluster.close(); | ||||||
|  |     } | ||||||
|     if (this.server) { |     if (this.server) { | ||||||
|       await this.server.close(); |       await this.server.close(); | ||||||
|     } |     } | ||||||
| @ -102,8 +117,8 @@ class Server { | |||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       // wait for preview component to initialize
 |       // wait for preview component to initialize
 | ||||||
|       await page.waitForSelector('meta[property="og:preview:loading"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }) |  | ||||||
|       let success; |       let success; | ||||||
|  |       await page.waitForSelector('meta[property="og:preview:loading"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }) | ||||||
|       success = await Promise.race([ |       success = await Promise.race([ | ||||||
|         page.waitForSelector('meta[property="og:preview:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => true), |         page.waitForSelector('meta[property="og:preview:ready"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => true), | ||||||
|         page.waitForSelector('meta[property="og:preview:fail"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => false) |         page.waitForSelector('meta[property="og:preview:fail"]', { timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000 }).then(() => false) | ||||||
| @ -124,6 +139,44 @@ class Server { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  |   async ssrClusterTask({ page, data: { url, path, action } }) { | ||||||
|  |     try { | ||||||
|  |       const urlParts = parseLanguageUrl(path); | ||||||
|  |       if (page.language !== urlParts.lang) { | ||||||
|  |         // switch language
 | ||||||
|  |         page.language = urlParts.lang; | ||||||
|  |         const localizedUrl = urlParts.lang ? `${this.mempoolHost}/${urlParts.lang}${urlParts.path}` : `${this.mempoolHost}${urlParts.path}` ; | ||||||
|  |         await page.goto(localizedUrl, { waitUntil: "load" }); | ||||||
|  |       } else { | ||||||
|  |         const loaded = await page.evaluate(async (path) => { | ||||||
|  |           if (window['ogService']) { | ||||||
|  |             window['ogService'].loadPage(path); | ||||||
|  |             return true; | ||||||
|  |           } else { | ||||||
|  |             return false; | ||||||
|  |           } | ||||||
|  |         }, urlParts.path); | ||||||
|  |         if (!loaded) { | ||||||
|  |           throw new Error('failed to access open graph service'); | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       await page.waitForNetworkIdle({ | ||||||
|  |         timeout: config.PUPPETEER.RENDER_TIMEOUT || 3000, | ||||||
|  |       }); | ||||||
|  |       let html = await page.content(); | ||||||
|  |       return html; | ||||||
|  |     } catch (e) { | ||||||
|  |       if (e instanceof TimeoutError) { | ||||||
|  |         let html = await page.content(); | ||||||
|  |         return html; | ||||||
|  |       } else { | ||||||
|  |         logger.err(`failed to render ${path} for ${action}: ` + (e instanceof Error ? e.message : `${e}`)); | ||||||
|  |         page.repairRequested = true; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|   async renderDisabled(req, res) { |   async renderDisabled(req, res) { | ||||||
|     res.status(500).send("preview rendering disabled"); |     res.status(500).send("preview rendering disabled"); | ||||||
|   } |   } | ||||||
| @ -163,11 +216,44 @@ class Server { | |||||||
|     // drop requests for static files
 |     // drop requests for static files
 | ||||||
|     const rawPath = req.params[0]; |     const rawPath = req.params[0]; | ||||||
|     const match = rawPath.match(/\.[\w]+$/); |     const match = rawPath.match(/\.[\w]+$/); | ||||||
|     if (match?.length && match[0] !== '.html') { |     if (match?.length && match[0] !== '.html' | ||||||
|       res.status(404).send(); |       || rawPath.startsWith('/api/v1/donations/images') | ||||||
|       return; |       || rawPath.startsWith('/api/v1/contributors/images') | ||||||
|  |       || rawPath.startsWith('/api/v1/translators/images') | ||||||
|  |       || rawPath.startsWith('/resources/profile') | ||||||
|  |     ) { | ||||||
|  |       if (req.headers['user-agent'] === 'googlebot') { | ||||||
|  |         if (this.secureHost) { | ||||||
|  |           https.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res)); | ||||||
|  |         } else { | ||||||
|  |           http.get(config.SERVER.HOST + rawPath, { headers: { 'user-agent': 'mempoolunfurl' }}, (got) => got.pipe(res)); | ||||||
|  |         } | ||||||
|  |         return; | ||||||
|  |       } else { | ||||||
|  |         res.status(404).send(); | ||||||
|  |         return; | ||||||
|  |       } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     let result = ''; | ||||||
|  |     try { | ||||||
|  |       if (req.headers['user-agent'] === 'googlebot') { | ||||||
|  |         result = await this.renderSEOPage(rawPath); | ||||||
|  |       } else { | ||||||
|  |         result = await this.renderUnfurlMeta(rawPath); | ||||||
|  |       } | ||||||
|  |       if (result && result.length) { | ||||||
|  |         res.send(result); | ||||||
|  |       } else { | ||||||
|  |         res.status(500).send(); | ||||||
|  |       } | ||||||
|  |     } catch (e) { | ||||||
|  |       logger.err(e instanceof Error ? e.message : `${e} ${req.params[0]}`); | ||||||
|  |       res.status(500).send(e instanceof Error ? e.message : e); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   async renderUnfurlMeta(rawPath: string): Promise<string> { | ||||||
|     const { lang, path } = parseLanguageUrl(rawPath); |     const { lang, path } = parseLanguageUrl(rawPath); | ||||||
|     const matchedRoute = matchRoute(this.network, path); |     const matchedRoute = matchRoute(this.network, path); | ||||||
|     let ogImageUrl = config.SERVER.HOST + (matchedRoute.staticImg || matchedRoute.fallbackImg); |     let ogImageUrl = config.SERVER.HOST + (matchedRoute.staticImg || matchedRoute.fallbackImg); | ||||||
| @ -178,7 +264,7 @@ class Server { | |||||||
|       ogTitle = `${this.network ? capitalize(this.network) + ' ' : ''}${matchedRoute.networkMode !== 'mainnet' ? capitalize(matchedRoute.networkMode) + ' ' : ''}${matchedRoute.title}`; |       ogTitle = `${this.network ? capitalize(this.network) + ' ' : ''}${matchedRoute.networkMode !== 'mainnet' ? capitalize(matchedRoute.networkMode) + ' ' : ''}${matchedRoute.title}`; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     res.send(` |     return ` | ||||||
|       <!doctype html> |       <!doctype html> | ||||||
|       <html lang="en-US" dir="ltr"> |       <html lang="en-US" dir="ltr"> | ||||||
|       <head> |       <head> | ||||||
| @ -199,7 +285,16 @@ class Server { | |||||||
|         <meta property="twitter:domain" content="mempool.space"> |         <meta property="twitter:domain" content="mempool.space"> | ||||||
|       <body></body> |       <body></body> | ||||||
|       </html> |       </html> | ||||||
|     `);
 |     `;
 | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   async renderSEOPage(rawPath: string): Promise<string> { | ||||||
|  |     let html = await this.ssrCluster?.execute({ url: this.mempoolHost + rawPath, path: rawPath, action: 'ssr' }); | ||||||
|  |     // remove javascript to prevent double hydration
 | ||||||
|  |     if (html && html.length) { | ||||||
|  |       html = html.replace(/<script.*<\/script>/g, ""); | ||||||
|  |     } | ||||||
|  |     return html; | ||||||
|   } |   } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user