"use strict"; /** * Copyright 2017 Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.Frame = exports.FrameManager = exports.FrameManagerEmittedEvents = void 0; const EventEmitter_js_1 = require("./EventEmitter.js"); const assert_js_1 = require("./assert.js"); const helper_js_1 = require("./helper.js"); const ExecutionContext_js_1 = require("./ExecutionContext.js"); const LifecycleWatcher_js_1 = require("./LifecycleWatcher.js"); const DOMWorld_js_1 = require("./DOMWorld.js"); const NetworkManager_js_1 = require("./NetworkManager.js"); const Connection_js_1 = require("./Connection.js"); const UTILITY_WORLD_NAME = '__puppeteer_utility_world__'; const xPathPattern = /^\(\/\/[^\)]+\)|^\/\//; /** * We use symbols to prevent external parties listening to these events. * They are internal to Puppeteer. * * @internal */ exports.FrameManagerEmittedEvents = { FrameAttached: Symbol('FrameManager.FrameAttached'), FrameNavigated: Symbol('FrameManager.FrameNavigated'), FrameDetached: Symbol('FrameManager.FrameDetached'), LifecycleEvent: Symbol('FrameManager.LifecycleEvent'), FrameNavigatedWithinDocument: Symbol('FrameManager.FrameNavigatedWithinDocument'), ExecutionContextCreated: Symbol('FrameManager.ExecutionContextCreated'), ExecutionContextDestroyed: Symbol('FrameManager.ExecutionContextDestroyed'), }; /** * @internal */ class FrameManager extends EventEmitter_js_1.EventEmitter { constructor(client, page, ignoreHTTPSErrors, timeoutSettings) { super(); this._frames = new Map(); this._contextIdToContext = new Map(); this._isolatedWorlds = new Set(); this._client = client; this._page = page; this._networkManager = new NetworkManager_js_1.NetworkManager(client, ignoreHTTPSErrors, this); this._timeoutSettings = timeoutSettings; this.setupEventListeners(this._client); } setupEventListeners(session) { session.on('Page.frameAttached', (event) => { this._onFrameAttached(session, event.frameId, event.parentFrameId); }); session.on('Page.frameNavigated', (event) => { this._onFrameNavigated(event.frame); }); session.on('Page.navigatedWithinDocument', (event) => { this._onFrameNavigatedWithinDocument(event.frameId, event.url); }); session.on('Page.frameDetached', (event) => { this._onFrameDetached(event.frameId, event.reason); }); session.on('Page.frameStoppedLoading', (event) => { this._onFrameStoppedLoading(event.frameId); }); session.on('Runtime.executionContextCreated', (event) => { this._onExecutionContextCreated(event.context, session); }); session.on('Runtime.executionContextDestroyed', (event) => { this._onExecutionContextDestroyed(event.executionContextId, session); }); session.on('Runtime.executionContextsCleared', () => { this._onExecutionContextsCleared(session); }); session.on('Page.lifecycleEvent', (event) => { this._onLifecycleEvent(event); }); session.on('Target.attachedToTarget', async (event) => { this._onAttachedToTarget(event); }); session.on('Target.detachedFromTarget', async (event) => { this._onDetachedFromTarget(event); }); } async initialize(client = this._client) { try { const result = await Promise.all([ client.send('Page.enable'), client.send('Page.getFrameTree'), ]); const { frameTree } = result[1]; this._handleFrameTree(client, frameTree); await Promise.all([ client.send('Page.setLifecycleEventsEnabled', { enabled: true }), client .send('Runtime.enable') .then(() => this._ensureIsolatedWorld(client, UTILITY_WORLD_NAME)), // TODO: Network manager is not aware of OOP iframes yet. client === this._client ? this._networkManager.initialize() : Promise.resolve(), ]); } catch (error) { // The target might have been closed before the initialization finished. if (error.message.includes('Target closed') || error.message.includes('Session closed')) { return; } throw error; } } networkManager() { return this._networkManager; } async navigateFrame(frame, url, options = {}) { assertNoLegacyNavigationOptions(options); const { referer = this._networkManager.extraHTTPHeaders()['referer'], waitUntil = ['load'], timeout = this._timeoutSettings.navigationTimeout(), } = options; const watcher = new LifecycleWatcher_js_1.LifecycleWatcher(this, frame, waitUntil, timeout); let ensureNewDocumentNavigation = false; let error = await Promise.race([ navigate(this._client, url, referer, frame._id), watcher.timeoutOrTerminationPromise(), ]); if (!error) { error = await Promise.race([ watcher.timeoutOrTerminationPromise(), ensureNewDocumentNavigation ? watcher.newDocumentNavigationPromise() : watcher.sameDocumentNavigationPromise(), ]); } watcher.dispose(); if (error) throw error; return watcher.navigationResponse(); async function navigate(client, url, referrer, frameId) { try { const response = await client.send('Page.navigate', { url, referrer, frameId, }); ensureNewDocumentNavigation = !!response.loaderId; return response.errorText ? new Error(`${response.errorText} at ${url}`) : null; } catch (error) { return error; } } } async waitForFrameNavigation(frame, options = {}) { assertNoLegacyNavigationOptions(options); const { waitUntil = ['load'], timeout = this._timeoutSettings.navigationTimeout(), } = options; const watcher = new LifecycleWatcher_js_1.LifecycleWatcher(this, frame, waitUntil, timeout); const error = await Promise.race([ watcher.timeoutOrTerminationPromise(), watcher.sameDocumentNavigationPromise(), watcher.newDocumentNavigationPromise(), ]); watcher.dispose(); if (error) throw error; return watcher.navigationResponse(); } async _onAttachedToTarget(event) { if (event.targetInfo.type !== 'iframe') { return; } const frame = this._frames.get(event.targetInfo.targetId); const session = Connection_js_1.Connection.fromSession(this._client).session(event.sessionId); frame._updateClient(session); this.setupEventListeners(session); await this.initialize(session); } async _onDetachedFromTarget(event) { const frame = this._frames.get(event.targetId); if (frame && frame.isOOPFrame()) { // When an OOP iframe is removed from the page, it // will only get a Target.detachedFromTarget event. this._removeFramesRecursively(frame); } } _onLifecycleEvent(event) { const frame = this._frames.get(event.frameId); if (!frame) return; frame._onLifecycleEvent(event.loaderId, event.name); this.emit(exports.FrameManagerEmittedEvents.LifecycleEvent, frame); } _onFrameStoppedLoading(frameId) { const frame = this._frames.get(frameId); if (!frame) return; frame._onLoadingStopped(); this.emit(exports.FrameManagerEmittedEvents.LifecycleEvent, frame); } _handleFrameTree(session, frameTree) { if (frameTree.frame.parentId) { this._onFrameAttached(session, frameTree.frame.id, frameTree.frame.parentId); } this._onFrameNavigated(frameTree.frame); if (!frameTree.childFrames) return; for (const child of frameTree.childFrames) { this._handleFrameTree(session, child); } } page() { return this._page; } mainFrame() { return this._mainFrame; } frames() { return Array.from(this._frames.values()); } frame(frameId) { return this._frames.get(frameId) || null; } _onFrameAttached(session, frameId, parentFrameId) { if (this._frames.has(frameId)) { const frame = this._frames.get(frameId); if (session && frame.isOOPFrame()) { // If an OOP iframes becomes a normal iframe again // it is first attached to the parent page before // the target is removed. frame._updateClient(session); } return; } (0, assert_js_1.assert)(parentFrameId); const parentFrame = this._frames.get(parentFrameId); const frame = new Frame(this, parentFrame, frameId, session); this._frames.set(frame._id, frame); this.emit(exports.FrameManagerEmittedEvents.FrameAttached, frame); } _onFrameNavigated(framePayload) { const isMainFrame = !framePayload.parentId; let frame = isMainFrame ? this._mainFrame : this._frames.get(framePayload.id); (0, assert_js_1.assert)(isMainFrame || frame, 'We either navigate top level or have old version of the navigated frame'); // Detach all child frames first. if (frame) { for (const child of frame.childFrames()) this._removeFramesRecursively(child); } // Update or create main frame. if (isMainFrame) { if (frame) { // Update frame id to retain frame identity on cross-process navigation. this._frames.delete(frame._id); frame._id = framePayload.id; } else { // Initial main frame navigation. frame = new Frame(this, null, framePayload.id, this._client); } this._frames.set(framePayload.id, frame); this._mainFrame = frame; } // Update frame payload. frame._navigated(framePayload); this.emit(exports.FrameManagerEmittedEvents.FrameNavigated, frame); } async _ensureIsolatedWorld(session, name) { const key = `${session.id()}:${name}`; if (this._isolatedWorlds.has(key)) return; this._isolatedWorlds.add(key); await session.send('Page.addScriptToEvaluateOnNewDocument', { source: `//# sourceURL=${ExecutionContext_js_1.EVALUATION_SCRIPT_URL}`, worldName: name, }); // Frames might be removed before we send this. await Promise.all(this.frames() .filter((frame) => frame._client === session) .map((frame) => session.send('Page.createIsolatedWorld', { frameId: frame._id, worldName: name, grantUniveralAccess: true, }))); } _onFrameNavigatedWithinDocument(frameId, url) { const frame = this._frames.get(frameId); if (!frame) return; frame._navigatedWithinDocument(url); this.emit(exports.FrameManagerEmittedEvents.FrameNavigatedWithinDocument, frame); this.emit(exports.FrameManagerEmittedEvents.FrameNavigated, frame); } _onFrameDetached(frameId, reason) { const frame = this._frames.get(frameId); if (reason === 'remove') { // Only remove the frame if the reason for the detached event is // an actual removement of the frame. // For frames that become OOP iframes, the reason would be 'swap'. if (frame) this._removeFramesRecursively(frame); } } _onExecutionContextCreated(contextPayload, session) { const auxData = contextPayload.auxData; const frameId = auxData ? auxData.frameId : null; const frame = this._frames.get(frameId) || null; let world = null; if (frame) { // Only care about execution contexts created for the current session. if (frame._client !== session) return; if (contextPayload.auxData && !!contextPayload.auxData['isDefault']) { world = frame._mainWorld; } else if (contextPayload.name === UTILITY_WORLD_NAME && !frame._secondaryWorld._hasContext()) { // In case of multiple sessions to the same target, there's a race between // connections so we might end up creating multiple isolated worlds. // We can use either. world = frame._secondaryWorld; } } const context = new ExecutionContext_js_1.ExecutionContext(frame._client || this._client, contextPayload, world); if (world) world._setContext(context); const key = `${session.id()}:${contextPayload.id}`; this._contextIdToContext.set(key, context); } _onExecutionContextDestroyed(executionContextId, session) { const key = `${session.id()}:${executionContextId}`; const context = this._contextIdToContext.get(key); if (!context) return; this._contextIdToContext.delete(key); if (context._world) context._world._setContext(null); } _onExecutionContextsCleared(session) { for (const [key, context] of this._contextIdToContext.entries()) { // Make sure to only clear execution contexts that belong // to the current session. if (context._client !== session) continue; if (context._world) context._world._setContext(null); this._contextIdToContext.delete(key); } } executionContextById(contextId, session = this._client) { const key = `${session.id()}:${contextId}`; const context = this._contextIdToContext.get(key); (0, assert_js_1.assert)(context, 'INTERNAL ERROR: missing context with id = ' + contextId); return context; } _removeFramesRecursively(frame) { for (const child of frame.childFrames()) this._removeFramesRecursively(child); frame._detach(); this._frames.delete(frame._id); this.emit(exports.FrameManagerEmittedEvents.FrameDetached, frame); } } exports.FrameManager = FrameManager; /** * At every point of time, page exposes its current frame tree via the * {@link Page.mainFrame | page.mainFrame} and * {@link Frame.childFrames | frame.childFrames} methods. * * @remarks * * `Frame` object lifecycles are controlled by three events that are all * dispatched on the page object: * * - {@link PageEmittedEvents.FrameAttached} * * - {@link PageEmittedEvents.FrameNavigated} * * - {@link PageEmittedEvents.FrameDetached} * * @Example * An example of dumping frame tree: * * ```js * const puppeteer = require('puppeteer'); * * (async () => { * const browser = await puppeteer.launch(); * const page = await browser.newPage(); * await page.goto('https://www.google.com/chrome/browser/canary.html'); * dumpFrameTree(page.mainFrame(), ''); * await browser.close(); * * function dumpFrameTree(frame, indent) { * console.log(indent + frame.url()); * for (const child of frame.childFrames()) { * dumpFrameTree(child, indent + ' '); * } * } * })(); * ``` * * @Example * An example of getting text from an iframe element: * * ```js * const frame = page.frames().find(frame => frame.name() === 'myframe'); * const text = await frame.$eval('.selector', element => element.textContent); * console.log(text); * ``` * * @public */ class Frame { /** * @internal */ constructor(frameManager, parentFrame, frameId, client) { this._url = ''; this._detached = false; /** * @internal */ this._loaderId = ''; /** * @internal */ this._lifecycleEvents = new Set(); this._frameManager = frameManager; this._parentFrame = parentFrame; this._url = ''; this._id = frameId; this._detached = false; this._loaderId = ''; this._childFrames = new Set(); if (this._parentFrame) this._parentFrame._childFrames.add(this); this._updateClient(client); } /** * @internal */ _updateClient(client) { this._client = client; this._mainWorld = new DOMWorld_js_1.DOMWorld(this._client, this._frameManager, this, this._frameManager._timeoutSettings); this._secondaryWorld = new DOMWorld_js_1.DOMWorld(this._client, this._frameManager, this, this._frameManager._timeoutSettings); } isOOPFrame() { return this._client !== this._frameManager._client; } /** * @remarks * * `frame.goto` will throw an error if: * - there's an SSL error (e.g. in case of self-signed certificates). * * - target URL is invalid. * * - the `timeout` is exceeded during navigation. * * - the remote server does not respond or is unreachable. * * - the main resource failed to load. * * `frame.goto` will not throw an error when any valid HTTP status code is * returned by the remote server, including 404 "Not Found" and 500 "Internal * Server Error". The status code for such responses can be retrieved by * calling {@link HTTPResponse.status}. * * NOTE: `frame.goto` either throws an error or returns a main resource * response. The only exceptions are navigation to `about:blank` or * navigation to the same URL with a different hash, which would succeed and * return `null`. * * NOTE: Headless mode doesn't support navigation to a PDF document. See * the {@link https://bugs.chromium.org/p/chromium/issues/detail?id=761295 | upstream * issue}. * * @param url - the URL to navigate the frame to. This should include the * scheme, e.g. `https://`. * @param options - navigation options. `waitUntil` is useful to define when * the navigation should be considered successful - see the docs for * {@link PuppeteerLifeCycleEvent} for more details. * * @returns A promise which resolves to the main resource response. In case of * multiple redirects, the navigation will resolve with the response of the * last redirect. */ async goto(url, options = {}) { return await this._frameManager.navigateFrame(this, url, options); } /** * @remarks * * This resolves when the frame navigates to a new URL. It is useful for when * you run code which will indirectly cause the frame to navigate. Consider * this example: * * ```js * const [response] = await Promise.all([ * // The navigation promise resolves after navigation has finished * frame.waitForNavigation(), * // Clicking the link will indirectly cause a navigation * frame.click('a.my-link'), * ]); * ``` * * Usage of the {@link https://developer.mozilla.org/en-US/docs/Web/API/History_API | History API} to change the URL is considered a navigation. * * @param options - options to configure when the navigation is consided finished. * @returns a promise that resolves when the frame navigates to a new URL. */ async waitForNavigation(options = {}) { return await this._frameManager.waitForFrameNavigation(this, options); } /** * @returns a promise that resolves to the frame's default execution context. */ executionContext() { return this._mainWorld.executionContext(); } /** * @remarks * * The only difference between {@link Frame.evaluate} and * `frame.evaluateHandle` is that `evaluateHandle` will return the value * wrapped in an in-page object. * * This method behaves identically to {@link Page.evaluateHandle} except it's * run within the context of the `frame`, rather than the entire page. * * @param pageFunction - a function that is run within the frame * @param args - arguments to be passed to the pageFunction */ async evaluateHandle(pageFunction, ...args) { return this._mainWorld.evaluateHandle(pageFunction, ...args); } /** * @remarks * * This method behaves identically to {@link Page.evaluate} except it's run * within the context of the `frame`, rather than the entire page. * * @param pageFunction - a function that is run within the frame * @param args - arguments to be passed to the pageFunction */ async evaluate(pageFunction, ...args) { return this._mainWorld.evaluate(pageFunction, ...args); } /** * This method queries the frame for the given selector. * * @param selector - a selector to query for. * @returns A promise which resolves to an `ElementHandle` pointing at the * element, or `null` if it was not found. */ async $(selector) { return this._mainWorld.$(selector); } /** * This method evaluates the given XPath expression and returns the results. * * @param expression - the XPath expression to evaluate. */ async $x(expression) { return this._mainWorld.$x(expression); } /** * @remarks * * This method runs `document.querySelector` within * the frame and passes it as the first argument to `pageFunction`. * * If `pageFunction` returns a Promise, then `frame.$eval` would wait for * the promise to resolve and return its value. * * @example * * ```js * const searchValue = await frame.$eval('#search', el => el.value); * ``` * * @param selector - the selector to query for * @param pageFunction - the function to be evaluated in the frame's context * @param args - additional arguments to pass to `pageFuncton` */ async $eval(selector, pageFunction, ...args) { return this._mainWorld.$eval(selector, pageFunction, ...args); } /** * @remarks * * This method runs `Array.from(document.querySelectorAll(selector))` within * the frame and passes it as the first argument to `pageFunction`. * * If `pageFunction` returns a Promise, then `frame.$$eval` would wait for * the promise to resolve and return its value. * * @example * * ```js * const divsCounts = await frame.$$eval('div', divs => divs.length); * ``` * * @param selector - the selector to query for * @param pageFunction - the function to be evaluated in the frame's context * @param args - additional arguments to pass to `pageFuncton` */ async $$eval(selector, pageFunction, ...args) { return this._mainWorld.$$eval(selector, pageFunction, ...args); } /** * This runs `document.querySelectorAll` in the frame and returns the result. * * @param selector - a selector to search for * @returns An array of element handles pointing to the found frame elements. */ async $$(selector) { return this._mainWorld.$$(selector); } /** * @returns the full HTML contents of the frame, including the doctype. */ async content() { return this._secondaryWorld.content(); } /** * Set the content of the frame. * * @param html - HTML markup to assign to the page. * @param options - options to configure how long before timing out and at * what point to consider the content setting successful. */ async setContent(html, options = {}) { return this._secondaryWorld.setContent(html, options); } /** * @remarks * * If the name is empty, it returns the `id` attribute instead. * * Note: This value is calculated once when the frame is created, and will not * update if the attribute is changed later. * * @returns the frame's `name` attribute as specified in the tag. */ name() { return this._name || ''; } /** * @returns the frame's URL. */ url() { return this._url; } /** * @returns the parent `Frame`, if any. Detached and main frames return `null`. */ parentFrame() { return this._parentFrame; } /** * @returns an array of child frames. */ childFrames() { return Array.from(this._childFrames); } /** * @returns `true` if the frame has been detached, or `false` otherwise. */ isDetached() { return this._detached; } /** * Adds a `