Merge pull request #4653 from mempool/mononaut/healthier-checks
Staggered esplora fallback health checks
This commit is contained in:
		
						commit
						49553b7bae
					
				@ -4,6 +4,7 @@ import http from 'http';
 | 
			
		||||
import { AbstractBitcoinApi } from './bitcoin-api-abstract-factory';
 | 
			
		||||
import { IEsploraApi } from './esplora-api.interface';
 | 
			
		||||
import logger from '../../logger';
 | 
			
		||||
import { Common } from '../common';
 | 
			
		||||
 | 
			
		||||
interface FailoverHost {
 | 
			
		||||
  host: string,
 | 
			
		||||
@ -15,11 +16,13 @@ interface FailoverHost {
 | 
			
		||||
  outOfSync?: boolean,
 | 
			
		||||
  unreachable?: boolean,
 | 
			
		||||
  preferred?: boolean,
 | 
			
		||||
  checked: boolean,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class FailoverRouter {
 | 
			
		||||
  activeHost: FailoverHost;
 | 
			
		||||
  fallbackHost: FailoverHost;
 | 
			
		||||
  maxHeight: number = 0;
 | 
			
		||||
  hosts: FailoverHost[];
 | 
			
		||||
  multihost: boolean;
 | 
			
		||||
  pollInterval: number = 60000;
 | 
			
		||||
@ -34,6 +37,7 @@ class FailoverRouter {
 | 
			
		||||
    this.hosts = (config.ESPLORA.FALLBACK || []).map(domain => {
 | 
			
		||||
      return {
 | 
			
		||||
        host: domain,
 | 
			
		||||
        checked: false,
 | 
			
		||||
        rtts: [],
 | 
			
		||||
        rtt: Infinity,
 | 
			
		||||
        failures: 0,
 | 
			
		||||
@ -46,6 +50,7 @@ class FailoverRouter {
 | 
			
		||||
      failures: 0,
 | 
			
		||||
      socket: !!config.ESPLORA.UNIX_SOCKET_PATH,
 | 
			
		||||
      preferred: true,
 | 
			
		||||
      checked: false,
 | 
			
		||||
    };
 | 
			
		||||
    this.fallbackHost = this.activeHost;
 | 
			
		||||
    this.hosts.unshift(this.activeHost);
 | 
			
		||||
@ -74,66 +79,87 @@ class FailoverRouter {
 | 
			
		||||
      clearTimeout(this.pollTimer);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const results = await Promise.allSettled(this.hosts.map(async (host) => {
 | 
			
		||||
      if (host.socket) {
 | 
			
		||||
        return this.pollConnection.get<number>('/blocks/tip/height', { socketPath: host.host, timeout: config.ESPLORA.FALLBACK_TIMEOUT });
 | 
			
		||||
      } else {
 | 
			
		||||
        return this.pollConnection.get<number>(host.host + '/blocks/tip/height', { timeout: config.ESPLORA.FALLBACK_TIMEOUT });
 | 
			
		||||
      }
 | 
			
		||||
    }));
 | 
			
		||||
    const maxHeight = results.reduce((max, result) => Math.max(max, result.status === 'fulfilled' ? result.value?.data || 0 : 0), 0);
 | 
			
		||||
    const start = Date.now();
 | 
			
		||||
 | 
			
		||||
    // update rtts & sync status
 | 
			
		||||
    for (let i = 0; i < results.length; i++) {
 | 
			
		||||
      const host = this.hosts[i];
 | 
			
		||||
      const result = results[i].status === 'fulfilled' ? (results[i] as PromiseFulfilledResult<AxiosResponse<number, any>>).value : null;
 | 
			
		||||
      if (result) {
 | 
			
		||||
        const height = result.data;
 | 
			
		||||
        const rtt = result.config['meta'].rtt;
 | 
			
		||||
        host.rtts.unshift(rtt);
 | 
			
		||||
        host.rtts.slice(0, 5);
 | 
			
		||||
        host.rtt = host.rtts.reduce((acc, l) => acc + l, 0) / host.rtts.length;
 | 
			
		||||
        host.latestHeight = height;
 | 
			
		||||
        if (height == null || isNaN(height) || (maxHeight - height > 2)) {
 | 
			
		||||
          host.outOfSync = true;
 | 
			
		||||
    for (const host of this.hosts) {
 | 
			
		||||
      try {
 | 
			
		||||
        const result = await (host.socket
 | 
			
		||||
          ? this.pollConnection.get<number>('/blocks/tip/height', { socketPath: host.host, timeout: config.ESPLORA.FALLBACK_TIMEOUT })
 | 
			
		||||
          : this.pollConnection.get<number>(host.host + '/blocks/tip/height', { timeout: config.ESPLORA.FALLBACK_TIMEOUT })
 | 
			
		||||
        );
 | 
			
		||||
        if (result) {
 | 
			
		||||
          const height = result.data;
 | 
			
		||||
          this.maxHeight = Math.max(height, this.maxHeight);
 | 
			
		||||
          const rtt = result.config['meta'].rtt;
 | 
			
		||||
          host.rtts.unshift(rtt);
 | 
			
		||||
          host.rtts.slice(0, 5);
 | 
			
		||||
          host.rtt = host.rtts.reduce((acc, l) => acc + l, 0) / host.rtts.length;
 | 
			
		||||
          host.latestHeight = height;
 | 
			
		||||
          if (height == null || isNaN(height) || (this.maxHeight - height > 2)) {
 | 
			
		||||
            host.outOfSync = true;
 | 
			
		||||
          } else {
 | 
			
		||||
            host.outOfSync = false;
 | 
			
		||||
          }
 | 
			
		||||
          host.unreachable = false;
 | 
			
		||||
        } else {
 | 
			
		||||
          host.outOfSync = false;
 | 
			
		||||
          host.outOfSync = true;
 | 
			
		||||
          host.unreachable = true;
 | 
			
		||||
          host.rtts = [];
 | 
			
		||||
          host.rtt = Infinity;
 | 
			
		||||
        }
 | 
			
		||||
        host.unreachable = false;
 | 
			
		||||
      } else {
 | 
			
		||||
      } catch (e) {
 | 
			
		||||
        host.outOfSync = true;
 | 
			
		||||
        host.unreachable = true;
 | 
			
		||||
        host.rtts = [];
 | 
			
		||||
        host.rtt = Infinity;
 | 
			
		||||
      }
 | 
			
		||||
      host.checked = true;
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
      // switch if the current host is out of sync or significantly slower than the next best alternative
 | 
			
		||||
      const rankOrder = this.sortHosts();
 | 
			
		||||
      // switch if the current host is out of sync or significantly slower than the next best alternative
 | 
			
		||||
      if (this.activeHost.outOfSync || this.activeHost.unreachable || (this.activeHost !== rankOrder[0] && rankOrder[0].preferred) || (!this.activeHost.preferred && this.activeHost.rtt > (rankOrder[0].rtt * 2) + 50)) {
 | 
			
		||||
        if (this.activeHost.unreachable) {
 | 
			
		||||
          logger.warn(`🚨🚨🚨 Unable to reach ${this.activeHost.host}, failing over to next best alternative 🚨🚨🚨`);
 | 
			
		||||
        } else if (this.activeHost.outOfSync) {
 | 
			
		||||
          logger.warn(`🚨🚨🚨 ${this.activeHost.host} has fallen behind, failing over to next best alternative 🚨🚨🚨`);
 | 
			
		||||
        } else {
 | 
			
		||||
          logger.debug(`🛠️ ${this.activeHost.host} is no longer the best esplora host 🛠️`);
 | 
			
		||||
        }
 | 
			
		||||
        this.electHost();
 | 
			
		||||
      }
 | 
			
		||||
      await Common.sleep$(50);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this.sortHosts();
 | 
			
		||||
    const rankOrder = this.updateFallback();
 | 
			
		||||
    logger.debug(`Tomahawk ranking:\n${rankOrder.map((host, index) => this.formatRanking(index, host, this.activeHost, this.maxHeight)).join('\n')}`);
 | 
			
		||||
 | 
			
		||||
    logger.debug(`Tomahawk ranking:\n${this.hosts.map((host, index) => this.formatRanking(index, host, this.activeHost, maxHeight)).join('\n')}`);
 | 
			
		||||
    const elapsed = Date.now() - start;
 | 
			
		||||
 | 
			
		||||
    // switch if the current host is out of sync or significantly slower than the next best alternative
 | 
			
		||||
    if (this.activeHost.outOfSync || this.activeHost.unreachable || (this.activeHost !== this.hosts[0] && this.hosts[0].preferred) || (!this.activeHost.preferred && this.activeHost.rtt > (this.hosts[0].rtt * 2) + 50)) {
 | 
			
		||||
      if (this.activeHost.unreachable) {
 | 
			
		||||
        logger.warn(`🚨🚨🚨 Unable to reach ${this.activeHost.host}, failing over to next best alternative 🚨🚨🚨`);
 | 
			
		||||
      } else if (this.activeHost.outOfSync) {
 | 
			
		||||
        logger.warn(`🚨🚨🚨 ${this.activeHost.host} has fallen behind, failing over to next best alternative 🚨🚨🚨`);
 | 
			
		||||
      } else {
 | 
			
		||||
        logger.debug(`🛠️ ${this.activeHost.host} is no longer the best esplora host 🛠️`);
 | 
			
		||||
      }
 | 
			
		||||
      this.electHost();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this.pollTimer = setTimeout(() => { this.pollHosts(); }, this.pollInterval);
 | 
			
		||||
    this.pollTimer = setTimeout(() => { this.pollHosts(); }, Math.max(1, this.pollInterval - elapsed));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private formatRanking(index: number, host: FailoverHost, active: FailoverHost, maxHeight: number): string {
 | 
			
		||||
    const heightStatus = host.outOfSync ? '🚫' : (host.latestHeight && host.latestHeight < maxHeight ? '🟧' : '✅');
 | 
			
		||||
    return `${host === active ? '⭐️' : '  '} ${host.rtt < Infinity ? Math.round(host.rtt).toString().padStart(5, ' ') + 'ms' : '    -  '} ${host.unreachable ? '🔥' : '✅'} | block: ${host.latestHeight || '??????'} ${heightStatus} | ${host.host} ${host === active ? '⭐️' : '  '}`;
 | 
			
		||||
    const heightStatus = !host.checked ? '⏳' : (host.outOfSync ? '🚫' : (host.latestHeight && host.latestHeight < maxHeight ? '🟧' : '✅'));
 | 
			
		||||
    return `${host === active ? '⭐️' : '  '} ${host.rtt < Infinity ? Math.round(host.rtt).toString().padStart(5, ' ') + 'ms' : '    -  '} ${!host.checked ? '⏳' : (host.unreachable ? '🔥' : '✅')} | block: ${host.latestHeight || '??????'} ${heightStatus} | ${host.host} ${host === active ? '⭐️' : '  '}`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private updateFallback(): FailoverHost[] {
 | 
			
		||||
    const rankOrder = this.sortHosts();
 | 
			
		||||
    if (rankOrder.length > 1 && rankOrder[0] === this.activeHost) {
 | 
			
		||||
      this.fallbackHost = rankOrder[1];
 | 
			
		||||
    } else {
 | 
			
		||||
      this.fallbackHost = rankOrder[0];
 | 
			
		||||
    }
 | 
			
		||||
    return rankOrder;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // sort hosts by connection quality, and update default fallback
 | 
			
		||||
  private sortHosts(): void {
 | 
			
		||||
  private sortHosts(): FailoverHost[] {
 | 
			
		||||
    // sort by connection quality
 | 
			
		||||
    this.hosts.sort((a, b) => {
 | 
			
		||||
    return this.hosts.slice().sort((a, b) => {
 | 
			
		||||
      if ((a.unreachable || a.outOfSync) === (b.unreachable || b.outOfSync)) {
 | 
			
		||||
        if  (a.preferred === b.preferred) {
 | 
			
		||||
          // lower rtt is best
 | 
			
		||||
@ -145,19 +171,14 @@ class FailoverRouter {
 | 
			
		||||
        return (a.unreachable || a.outOfSync) ? 1 : -1;
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
    if (this.hosts.length > 1 && this.hosts[0] === this.activeHost) {
 | 
			
		||||
      this.fallbackHost = this.hosts[1];
 | 
			
		||||
    } else {
 | 
			
		||||
      this.fallbackHost = this.hosts[0];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // depose the active host and choose the next best replacement
 | 
			
		||||
  private electHost(): void {
 | 
			
		||||
    this.activeHost.outOfSync = true;
 | 
			
		||||
    this.activeHost.failures = 0;
 | 
			
		||||
    this.sortHosts();
 | 
			
		||||
    this.activeHost = this.hosts[0];
 | 
			
		||||
    const rankOrder = this.sortHosts();
 | 
			
		||||
    this.activeHost = rankOrder[0];
 | 
			
		||||
    logger.warn(`Switching esplora host to ${this.activeHost.host}`);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user