Selenium II: dispatch-oriented browser automation


The failure mode of linear scripts

Part I gave us a reliable way to claim tasks, detect page states, and click elements safely. But the workflow still had a critical gap: the actual submission happens on a secondary page, and the primary page needs to know exactly when that submission is done. A naive approach would poll the secondary page’s DOM, but the submit button disappears before the server has actually accepted the request. Waiting for the button to vanish means you think you are done when the network request is still in flight.

This article covers the second iteration: treating Selenium like a runtime. The script owns a tab queue, injects request-tracking JavaScript into every new page, intercepts click events at the browser level, and coordinates between two pages through a shared event loop rather than a linear script.

Tab queuing for the secondary page

The secondary page (a partner portal) opens in its own tab. Instead of managing one tab, the script maintains a queue of handles. When a new task needs a fresh portal tab, it is queued; when the task is done, the tab is popped and closed. This prevents the “stale window handle” errors that happen when you reuse a tab whose page has navigated away.

class PortalQueue
  include Controller::Portal

  PORTAL_URL = 'https://partner.example.com/tasks'

  attr_accessor :handles

  def initialize()
    @handles = []
    @last_queue_request_id = nil
  end

  def queue!(requester_page_id: nil, requester_url: nil)
    modes = Extension.get_modes
    active_handle = current_handle_valid? ? handle! : ($driver.window_handle rescue nil)
    handle = queue_portal_tab(seed_url(nil, requester_url), modes)

    @handles << handle unless @handles.include?(handle)
    if active_handle && active_handle != handle &&
       safe_window_handles.include?(active_handle)
      $driver.switch_to.window(active_handle)
    end
    Session.log('PORTAL', '+', "Queued portal tab #{handle}")
    handle
  rescue => e
    Session.log('PORTAL', '!', "Failed to queue tab: #{e.message}", error: e)
    raise
  end

  def pop()
    return nil if @handles.empty?
    handle = @handles.shift

    if safe_window_handles.include?(handle)
      begin
        $driver.switch_to.window(handle)
        $driver.close()
      rescue Selenium::WebDriver::Error::NoSuchWindowError
      end
    else
      Session.log('PORTAL', '-', "Discarded stale handle #{handle}")
    end
  end

  def handle!
    return nil if @handles.empty?
    @handles.first
  end

  private

  def queue_portal_tab(url, modes)
    current = $driver.window_handle rescue nil
    last = safe_window_handles.last
    $driver.switch_to.window(last) if last && current && last != current

    $driver.switch_to.new_window(:tab)
    handle = $driver.window_handle
    Extension.install_persistent!(modes)
    $driver.navigate.to(url)
    Extension.clear_queue_request!()
    handle
  ensure
    if current && safe_window_handles.include?(current)
      $driver.switch_to.window(current)
    end
  end
end

Track submissions at the network layer

The portal page submits via both XMLHttpRequest and fetch. The script cannot reliably observe completion from the DOM alone because the submit button is hidden optimistically. Instead, the script injects a small JavaScript tracker that instruments XHR and fetch to count pending requests and mark them complete when the response arrives.

INJECT_SUBMIT_TRACKER_JS = <<~JS
if (!window.__submitTracker) {
  const contributionId = window.location.pathname.split('/').pop();
  window.__submitTracker = {
    counter: 0,
    lastCompletedId: 0,
    pendingCount: 0,
    intentCount: 0,
    lastIntentAt: 0,
    requests: {},
    _trimOld: function() {
      const keep = 50;
      const keys = Object.keys(this.requests).sort((a,b)=>b-a);
      for (let i = keep; i < keys.length; i++) delete this.requests[keys[i]];
    },
    _isSubmitUrl: function(url) {
      return url && (url.includes('submit') ||
        (contributionId && url.includes(contributionId)));
    },
    _parseBody: function(body) {
      if (body == null) return null;
      if (typeof body === 'string') {
        try { return JSON.parse(body); } catch (e) { return null; }
      }
      if (typeof URLSearchParams !== 'undefined' && body instanceof URLSearchParams) {
        return Object.fromEntries(body.entries());
      }
      if (typeof FormData !== 'undefined' && body instanceof FormData) {
        const entries = {};
        body.forEach(function(value, key) { entries[key] = value; });
        return entries;
      }
      if (typeof body === 'object') return body;
      return null;
    },
    _hasFileIdPayload: function(body) {
      const parsed = this._parseBody(body);
      return !!(parsed && typeof parsed === 'object' &&
        Object.prototype.hasOwnProperty.call(parsed, 'fileId'));
    },
    _markIntent: function() {
      this.intentCount += 1;
      this.lastIntentAt = Date.now();
    }
  };

  (function(tracker){
    const submitLikeTarget = function(target) {
      const button = target && target.closest ?
        target.closest('button, [role="button"], input[type="submit"]') : null;
      if (!button) return false;
      const text = String(
        button.innerText || button.textContent ||
        button.value || button.getAttribute('aria-label') || ''
      ).trim().toLowerCase();
      return text.includes('submit') || text.includes('confirm submission');
    };

    document.addEventListener('click', function(event) {
      try {
        if (submitLikeTarget(event.target)) tracker._markIntent();
      } catch (e) {}
    }, true);

    window.addEventListener('submit', function() {
      try { tracker._markIntent(); } catch (e) {}
    }, true);

    const origOpen = XMLHttpRequest.prototype.open;
    const origSend = XMLHttpRequest.prototype.send;
    XMLHttpRequest.prototype.open = function(method, url) {
      this._method = method; this._url = url;
      return origOpen.apply(this, arguments);
    };
    XMLHttpRequest.prototype.send = function(body) {
      try {
        if (this._method && this._method.toUpperCase() === 'POST' &&
            tracker._isSubmitUrl(this._url) && !tracker._hasFileIdPayload(body)) {
          const id = ++tracker.counter;
          tracker.pendingCount++;
          tracker.requests[id] = {id, method: this._method, url: this._url, t0: Date.now()};
          const markDone = () => {
            if (tracker.requests[id]) tracker.requests[id].t1 = Date.now();
            tracker.lastCompletedId = Math.max(tracker.lastCompletedId, id);
            tracker.pendingCount = Math.max(0, tracker.pendingCount - 1);
            tracker._trimOld();
          };
          this.addEventListener('load', markDone);
          this.addEventListener('error', markDone);
          this.addEventListener('abort', markDone);
        }
      } catch(e) {}
      return origSend.apply(this, arguments);
    };

    const origFetch = window.fetch;
    window.fetch = async function(input, init) {
      const method = (init && init.method) || 'GET';
      const url = (typeof input === 'string') ? input : (input && input.url);
      let body = init && Object.prototype.hasOwnProperty.call(init, 'body') ? init.body : undefined;
      if (body === undefined && typeof Request !== 'undefined' && input instanceof Request) {
        try { body = await input.clone().text(); } catch (e) {}
      }
      if (method && method.toUpperCase() === 'POST' &&
          tracker._isSubmitUrl(url) && !tracker._hasFileIdPayload(body)) {
        const id = ++tracker.counter;
        tracker.pendingCount++;
        tracker.requests[id] = {id, method, url, t0: Date.now()};
        return origFetch.apply(this, arguments)
          .then(res => {
            tracker.requests[id].t1 = Date.now();
            tracker.lastCompletedId = Math.max(tracker.lastCompletedId, id);
            tracker.pendingCount = Math.max(0, tracker.pendingCount - 1);
            tracker._trimOld();
            return res;
          })
          .catch(err => {
            tracker.requests[id].t1 = Date.now();
            tracker.lastCompletedId = Math.max(tracker.lastCompletedId, id);
            tracker.pendingCount = Math.max(0, tracker.pendingCount - 1);
            tracker._trimOld();
            throw err;
          });
      }
      return origFetch.apply(this, arguments);
    };
  })(window.__submitTracker);
}
JS

def ensure_hooks!()
  tracker = $driver.execute_script("return !!window.__submitTracker;") rescue false
  $driver.execute_script(INJECT_SUBMIT_TRACKER_JS) unless tracker
  !tracker
end

The tracker instruments both XMLHttpRequest and fetch, counts pending requests, and records timestamps. It also listens for click and submit events on the page to detect intent (the user clicked a submit-like button) independently of whether the network request has actually fired yet.

Dual-channel finalization check

With the JS tracker in place, the script can now ask two independent questions: is the UI showing a finalized state, and has the network layer finished all its requests? Only when both agree can the script safely switch back to the primary page and continue.

def has_finalized?
  ui = submission_elements!.length <= 1 &&
    !(submission_elements!.any? { |btn| btn.enabled? } rescue false)
  intent = false

  begin
    t = $driver.execute_script("return window.__submitTracker || null")
    if t
      intent = t['intentCount'].to_i > 0
      return :pending if ui && t['pendingCount'].to_i > 0
      return :complete if ui && t['counter'].to_i > 0 &&
        t['pendingCount'].to_i == 0 &&
        t['lastCompletedId'].to_i == t['counter'].to_i
    end
  rescue
  end

  ui && intent ? :partial : nil
end

SUBMIT        = "//button[contains(text(), 'Submit')]"
CONFIRM       = "//button[contains(text(), 'Confirm Submission')]"

def submission_elements!
  elements = $driver.find_elements(xpath: SUBMIT) +
             $driver.find_elements(xpath: CONFIRM)
  return elements if elements.any?
  # Fallback: buttons with checkmark SVGs
  $driver.find_elements(xpath: "//button[.//svg[contains(@class, 'check')]]")
end

The companion extension

The script coordinates with a small companion browser extension that exposes modes (auto-submit, auto-skip, audio notifications, etc.) through dataset attributes on the documentElement. The Ruby side polls these attributes and reacts to queue requests and pop commands without ever touching the extension’s internal DOM.

module Extension
  READY_DATASET_KEY  = 'companionReady'
  MODES_DATASET_KEY  = 'companionModes'
  QUEUE_DATASET_KEY  = 'queueRequested'
  POP_DATASET_KEY    = 'popRequested'

  MODES = %w[auto_submit auto_skip auto_acknowledge audio_notify
             sync_heartbeat unlock stereo_check].freeze

  module_function

  MODES.each do |mode|
    define_method("#{mode}?") { mode_enabled?(mode) }
  end

  def ready?
    $driver.execute_script(<<~JS)
      return !!(document.documentElement &&
        document.documentElement.dataset &&
        document.documentElement.dataset.#{READY_DATASET_KEY} === '1');
    JS
  rescue
    false
  end

  def state!
    $driver.execute_script(<<~JS)
      const data = (document.documentElement && document.documentElement.dataset) || {};
      if (data.#{QUEUE_DATASET_KEY} === '1') {
        return { type: :queue, pageId: data.companionPageId || null,
                 url: window.location.href };
      }
      if (data.#{POP_DATASET_KEY} === '1') return :pop;
      return null;
    JS
  rescue
    nil
  end

  def install_persistent!(modes = nil)
    @persistent_scripts ||= {}
    handle = $driver.window_handle
    return @persistent_scripts[handle] if @persistent_scripts[handle]

    result = $driver.execute_cdp(
      'Page.addScriptToEvaluateOnNewDocument',
      source: preload_source(modes))
    script_id = result.is_a?(Hash) ? (result['identifier'] || result[:identifier]) : result
    @persistent_scripts[handle] = script_id
    Session.log('EXTENSION', '~', "Registered preload for #{handle}")
    script_id
  end

  def ensure_ready!
    modes = get_modes
    install_persistent!(modes)
    return true if ready?
    bootstrap!(modes)
    Selenium::WebDriver::Wait.new(timeout: 1, interval: 0.05).until { ready? }
    true
  end
end

The dispatch loop: await_submission

The heart of the system is a single event loop on the portal tab. Every 100ms it checks: is the extension ready? Does the companion want a new tab queued or an old one popped? Has a new contribution appeared? Is the contribution finalized? Should we auto-submit after the handling time expires?

def await_submission(start)
  $driver.switch_to.window(handle!)

  mul = bell_rand()
  handling = 0
  idle_counter = 0
  prev_contribution_id = nil

  loop do
    sleep 0.1

    unless current_handle_valid?
      pop()
      return nil if @handles.empty?
      $driver.switch_to.window(handle!)
    end

    Extension.ensure_ready!()

    case Extension.state!
    when Hash
      request_id = state[:request_id] || [state[:page_id], state[:url]].join('|')
      if state[:type] == :queue && request_id != @last_queue_request_id
        queue!(requester_page_id: state[:page_id], requester_url: state[:url])
        @last_queue_request_id = request_id
      elsif state[:type] == :queue
        Extension.clear_queue_request!()
      end
    when :pop
      pop()
    end

    elements = submission_elements!
    if elements.any? && (ensure_hooks!() || contribution_id! != prev_contribution_id)
      prev_contribution_id = contribution_id!
      Session.log('PORTAL', '+', "Registered contribution #{contribution_id!}")

      if Extension.sync_heartbeat?
        Extension.block_heartbeats!()
        Extension.clear_heartbeat_events!()
      end

      handling = Session.max_handling_time(contribution_id!) * mul
      minutes = handling.floor
      seconds = ((handling - minutes) * 60).round
      Session.log('PORTAL', '~', "Suggested handling: #{format("%02d:%02d", minutes, seconds)}")
    end

    next if handling == 0
    state = has_finalized?
    break if state == :complete

    if Extension.auto_skip? && contribution_id!.include?('tasks')
      idle_counter += 1
      if idle_counter >= 300   # 30 seconds at 0.1s loop
        Session.log('PORTAL', '-', 'Tasks page idle, skipping...')
        return nil if @handles.length > 1
      end
    else
      idle_counter = 0
    end

    handling = apply_handling_display!(handling)
    if Extension.auto_submit? && (Time.now - start) >= handling * 60
      Session.log('PORTAL', '~', 'Auto-submitting...')
      submit!()
      break
    end
  end

  await_finalize(Extension.auto_skip?)
end

Intercepting navigation clicks with JavaScript

When the user (or script) clicks a navigation button on the primary page, the default behavior immediately navigates away. The script needs to know which button was clicked so it can decide whether to continue to the next task or exit. Instead of racing the browser, it overrides the click handlers on both buttons and writes a signal to a global variable that Ruby can poll.

def await_navigation!
  WAIT.until { safe_find_elements(xpath: GO_HOME).any? }

  safe_execute_script(<<~JS)
    const go = xp("#{GO_HOME}");
    const next = xp("#{NEXT_TASK}");

    function override(btn, signal) {
      if (!btn) return;
      const handler = function(e) {
        e.preventDefault();
        e.stopImmediatePropagation();
        window.__navSignal = signal;
      };
      btn.addEventListener("click", handler, true);
      btn.onclick = handler;
    }

    override(go, "GO_HOME");
    override(next, "NEXT_TASK");
  JS

  loop do
    sleep 0.1
    sig = safe_execute_script("return window.__navSignal || null;")
    next unless sig
    exit() if sig == "GO_HOME"
    break
  end
end

Putting it together: the Tracker

The Tracker class ties everything together. It owns the portal queue, runs the dispatch loop, reads the elapsed time from the primary page, and submits the completed task back to the dashboard.

class Tracker
  include Controller::Tracker

  attr_accessor :uuid, :status, :contribution_id, :project

  def initialize(project)
    @project = project
    @portal = @project.portal_queue

    if $driver.current_url.include?('?error=')
      @status = $driver.current_url.split('?error=').last
    else
      @uuid = $driver.current_url.match(
        /\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b/)
      @status = 'In progress'
      wait(true, xpath: EXIT_BUTTON)
    end
  end

  def await_submission()
    @portal.queue!() if @portal.handles.empty?
    $driver.switch_to.window(@project.handle)
    begin_task!(true)

    Session.log('TRACKER', '~', 'Hand-off to portal', sound_path: SUCCESS_SOUND)
    start = Time.now
    @contribution_id = @portal.await_submission(start)

    while @contribution_id.nil?
      @portal.pop()
      Session.log('TRACKER', '~', 'No contribution, retrying...')
      @contribution_id = @portal.await_submission(start)
    end

    Session.log('TRACKER', '~', 'Autofilling...')
    $driver.switch_to.window(@project.handle)

    loop do
      case state!
      when :external, :timer, :dialog, :error
        begin_task!()
      when :continue
        continue!()
      when :confirm
        oriented_click(xpath: CONFIRM_TIME)
      when :task
        submit_task!(@contribution_id)
      when :submit
        if submit_tracker!()
          elapsed = elapsed_time!
          Session.add_elapsed_minutes(elapsed)
          @status = 'Completed'
          Session.log('TRACKER', '+', "Completed #{@uuid}")
        end
      when :nav
        Session.log('TRACKER', '~', 'Awaiting navigation...')
        if @portal.handles.length > 1
          @portal.pop()
          return
        else
          await_navigation!()
          @portal.pop()
          return
        end
      end
    end
  end
end

What this iteration proved