chore: remove previous e2e crawler script

This commit is contained in:
Daniel Roe 2023-09-19 22:34:42 +01:00
parent 025fbdf6ba
commit 5d36e5c93e
4 changed files with 6 additions and 275 deletions

View File

@ -1,35 +0,0 @@
name: docs-e2e
on:
workflow_dispatch:
inputs:
url:
required: false
description: The URL to run the test suite against.
type: string
deployment_status:
permissions:
contents: read
jobs:
crawl-docs:
environment:
name: ${{ github.event.deployment.environment || 'Production' }}
url: ${{ github.event.inputs.url || github.event.deployment.payload.web_url || github.event.deployment_status.target_url }}
if: github.event.deployment_status.state == 'success' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- run: corepack enable
- uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d # v3.8.1
with:
cache: "pnpm"
- name: Install dependencies
run: pnpm install
- run: node ./scripts/crawl.mjs
env:
BASE_URL: ${{ github.event.inputs.url || github.event.deployment.payload.web_url || github.event.deployment_status.target_url }}

View File

@ -39,7 +39,6 @@
"magic-string": "^0.30.3"
},
"devDependencies": {
"@actions/core": "1.10.1",
"@nuxt/test-utils": "workspace:*",
"@nuxt/webpack-builder": "workspace:*",
"@nuxtjs/eslint-config-typescript": "12.1.0",
@ -47,9 +46,7 @@
"@types/node": "18.17.17",
"@types/semver": "7.5.2",
"case-police": "0.6.1",
"chalk": "5.3.0",
"changelogen": "0.5.5",
"cheerio": "1.0.0-rc.12",
"consola": "3.2.3",
"devalue": "4.3.2",
"eslint": "8.49.0",

View File

@ -19,9 +19,6 @@ importers:
.:
devDependencies:
'@actions/core':
specifier: 1.10.1
version: 1.10.1
'@nuxt/test-utils':
specifier: workspace:*
version: link:packages/test-utils
@ -43,15 +40,9 @@ importers:
case-police:
specifier: 0.6.1
version: 0.6.1
chalk:
specifier: 5.3.0
version: 5.3.0
changelogen:
specifier: 0.5.5
version: 0.5.5
cheerio:
specifier: 1.0.0-rc.12
version: 1.0.0-rc.12
consola:
specifier: 3.2.3
version: 3.2.3
@ -937,19 +928,6 @@ packages:
resolution: {integrity: sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==}
engines: {node: '>=0.10.0'}
/@actions/core@1.10.1:
resolution: {integrity: sha512-3lBR9EDAY+iYIpTnTIXmWcNbX3T2kCkAEQGIQx4NVQ0575nk2k3GRZDTPQG+vVtS2izSLmINlxXf0uLtnrTP+g==}
dependencies:
'@actions/http-client': 2.1.1
uuid: 8.3.2
dev: true
/@actions/http-client@2.1.1:
resolution: {integrity: sha512-qhrkRMB40bbbLo7gF+0vu+X+UawOvQQqNAA/5Unx774RS8poaOhThDOG6BGmxvAnxhQnDp2BG/ZUm65xZILTpw==}
dependencies:
tunnel: 0.0.6
dev: true
/@ampproject/remapping@2.2.1:
resolution: {integrity: sha512-lFMjJTrFL3j7L9yBxwYfCq2k6qqwHyzuUl/XBnif78PWTJYyL/dfowQHWE3sp6U6ZzqWiiIZnpTMO96zhkjwtg==}
engines: {node: '>=6.0.0'}
@ -4362,30 +4340,6 @@ packages:
resolution: {integrity: sha512-BrgHpW9NURQgzoNyjfq0Wu6VFO6D7IZEmJNdtgNqpzGG8RuNFHt2jQxWlAs4HMe119chBnv+34syEZtc6IhLtA==}
dev: true
/cheerio-select@2.1.0:
resolution: {integrity: sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==}
dependencies:
boolbase: 1.0.0
css-select: 5.1.0
css-what: 6.1.0
domelementtype: 2.3.0
domhandler: 5.0.3
domutils: 3.1.0
dev: true
/cheerio@1.0.0-rc.12:
resolution: {integrity: sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==}
engines: {node: '>= 6'}
dependencies:
cheerio-select: 2.1.0
dom-serializer: 2.0.0
domhandler: 5.0.3
domutils: 3.1.0
htmlparser2: 8.0.2
parse5: 7.1.2
parse5-htmlparser2-tree-adapter: 7.0.0
dev: true
/chokidar@3.5.3:
resolution: {integrity: sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==}
engines: {node: '>= 8.10.0'}
@ -4688,6 +4642,7 @@ packages:
domhandler: 5.0.3
domutils: 3.1.0
nth-check: 2.1.1
dev: false
/css-tree@2.2.1:
resolution: {integrity: sha512-OA0mILzGc1kCOCSJerOeqDxDQ4HOh+G8NbOJFOTgOCzpw7fCBubk0fEyxp8AgOL/jvLgYA/uV0cMbe43ElF1JA==}
@ -4708,6 +4663,7 @@ packages:
/css-what@6.1.0:
resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==}
engines: {node: '>= 6'}
dev: false
/css.escape@1.5.1:
resolution: {integrity: sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==}
@ -4970,15 +4926,18 @@ packages:
domelementtype: 2.3.0
domhandler: 5.0.3
entities: 4.5.0
dev: false
/domelementtype@2.3.0:
resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
dev: false
/domhandler@5.0.3:
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
engines: {node: '>= 4'}
dependencies:
domelementtype: 2.3.0
dev: false
/domutils@3.1.0:
resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==}
@ -4986,6 +4945,7 @@ packages:
dom-serializer: 2.0.0
domelementtype: 2.3.0
domhandler: 5.0.3
dev: false
/dot-case@3.0.4:
resolution: {integrity: sha512-Kv5nKlh6yRrdrGvxeJ2e5y2eRUpkUosIW4A2AS38zwSz27zu7ufDwQPi5Jhs3XAlGNetl3bmnGhQsMtkKJnj3w==}
@ -6375,15 +6335,6 @@ packages:
resolution: {integrity: sha512-0quDb7s97CfemeJAnW9wC0hw78MtW7NU3hqtCD75g2vFlDLt36llsYD7uB7SUzojLMP24N5IatXf7ylGXiGG9A==}
dev: true
/htmlparser2@8.0.2:
resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==}
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
domutils: 3.1.0
entities: 4.5.0
dev: true
/http-errors@2.0.0:
resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
engines: {node: '>= 0.8'}
@ -8442,23 +8393,10 @@ packages:
dependencies:
parse-path: 7.0.0
/parse5-htmlparser2-tree-adapter@7.0.0:
resolution: {integrity: sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==}
dependencies:
domhandler: 5.0.3
parse5: 7.1.2
dev: true
/parse5@6.0.1:
resolution: {integrity: sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==}
dev: true
/parse5@7.1.2:
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
dependencies:
entities: 4.5.0
dev: true
/parseurl@1.3.3:
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
engines: {node: '>= 0.8'}
@ -10195,11 +10133,6 @@ packages:
/tslib@2.6.1:
resolution: {integrity: sha512-t0hLfiEKfMUoqhG+U1oid7Pva4bbDPHYfJNiB7BiIjRkj1pyC++4N3huJfqY6aRH6VTB0rvtzQwjM4K6qpfOig==}
/tunnel@0.0.6:
resolution: {integrity: sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==}
engines: {node: '>=0.6.11 <=0.7.0 || >=0.7.3'}
dev: true
/type-check@0.4.0:
resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==}
engines: {node: '>= 0.8.0'}
@ -10695,11 +10628,6 @@ packages:
which-typed-array: 1.1.11
dev: true
/uuid@8.3.2:
resolution: {integrity: sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==}
hasBin: true
dev: true
/uvu@0.5.6:
resolution: {integrity: sha512-+g8ENReyr8YsOc6fv/NVJs2vFdHBnBNdfE49rshrTzDWOlUx4Gq7KOS2GD8eqhy2j+Ejq29+SbKH8yjkAqXqoA==}
engines: {node: '>=8'}

View File

@ -1,159 +0,0 @@
// @ts-check
import { fetch } from 'ofetch'
import { load } from 'cheerio'
import { consola } from 'consola'
import { parseURL, withoutTrailingSlash } from 'ufo'
import chalk from 'chalk'
import * as actions from '@actions/core'
import { isCI } from 'std-env'
const logger = consola.withTag('crawler')
const baseURL = withoutTrailingSlash(process.env.BASE_URL || 'https://nuxt.com')
const startingURL = baseURL + '/'
const excludedExtensions = ['svg', 'png', 'jpg', 'sketch', 'ico', 'gif', 'zip']
const urlsToOmit = ['http://localhost:3000']
// TODO: remove when migrating to Nuxt 3/Docus
const errorsToIgnore = [
'/guide/directory-structure/nuxt.config',
'/guide/directory-structure',
'/guide/directory-structure/app.config',
'/api/configuration/nuxt-config',
'/guide/deploy',
'/guide/features/app-config'
]
// GLOBALS
const urls = new Set([startingURL])
const erroredUrls = new Set()
const referrers = new Map()
/**
* @param {string} path Path to check
* @param {string | undefined} referrer The referring page
*/
function queue (path, referrer) {
if (!path) {
const message = chalk.red(`${chalk.bold('✗')} ${referrer} linked to empty href`)
if (isCI && path?.match(/\/docs\//)) { actions.error(message) }
logger.log(message)
return
}
if (urlsToOmit.some(url => path.startsWith(url))) { return }
const { pathname, origin } = new URL(path, referrer)
// Don't crawl the same page more than once
const url = `${origin}${pathname}`
if (!url || urls.has(url) || !crawler) { return }
// Don't try to visit linked assets (e.g. SVGs)
const extension = url.split('.').pop()
if (extension && excludedExtensions.includes(extension)) { return }
// Don't crawl external URLs
if (origin !== baseURL) { return }
referrers.set(url, referrer)
urls.add(url)
crawler.queue(url)
}
const crawler = {
maxConnections: 100,
/** @type {Array<string | Promise<any>>} */
_queue: [],
get queueSize () {
return this._queue.length
},
/** @param {string} url The URL to crawl */
queue (url) {
this._queue.push(url)
this.processQueue()
},
processQueue () {
if (!this.queueSize) { return }
for (let i = 0; i < Math.min(this.maxConnections, this.queueSize); i++) {
const item = this._queue[i]
if (!item || item instanceof Promise) { continue }
const promise = this._queue[i] = fetch(item, { redirect: 'manual' }).then(async (res) => {
const text = res.ok && await res.text()
this.callback(!res.ok ? new Error(res.statusText) : null, Object.assign(res, {
$: text ? load(text) : null
}), () => {
this._queue.splice(this._queue.indexOf(promise), 1)
this.processQueue()
})
})
}
},
/* eslint-disable jsdoc/valid-types */
/**
* @param {Error | null} error
* @param {import('ofetch').FetchResponse<any> & { $: import('cheerio').CheerioAPI | null }} res
* @param {() => void} done
*/
/* eslint-enable jsdoc/valid-types */
callback (error, res, done) {
const $ = res.$
const uri = res.url
const statusCode = res.status
if (error || ![200, 301, 302].includes(statusCode) || !$) {
// TODO: normalize relative links in module readmes - https://github.com/nuxt/nuxt.com/issues/1271
if (errorsToIgnore.includes(parseURL(uri).pathname) || referrers.get(uri)?.match(/\/modules\//) || !uri?.match(/\/docs\//)) {
const message = chalk.gray(`${chalk.bold('✗')} ${uri} (${statusCode}) [<- ${referrers.get(uri)}] (ignored)`)
logger.log(message)
return done()
}
const message = chalk.red(`${chalk.bold('✗')} ${uri} (${statusCode}) [<- ${referrers.get(uri)}]`)
if (isCI) { actions.error(message) }
logger.log(message)
erroredUrls.add(uri)
return done()
}
if (!$) {
const message = `Could not parse HTML for ${uri}`
logger.error(message)
if (isCI) { actions.warning(message) }
return done()
}
$('a:not([href*=mailto]):not([href*=tel])').each((_, el) => {
if ('attribs' in el && 'href' in el.attribs) {
queue(el.attribs.href, uri)
}
})
logger.success(chalk.green(uri))
logger.debug(uri, `[${crawler.queueSize} / ${urls.size}]`)
if (!isCI && crawler.queueSize === 1) {
logger.log('')
logger.info(`Checked \`${urls.size}\` pages.`)
// Tasks to run at the end.
if (erroredUrls.size) {
const message = `${chalk.bold(erroredUrls.size)} errors found on ${chalk.bold(baseURL)}.`
const error = new Error(`\n\n${message}\n`)
error.message = message
error.stack = ''
throw error
}
}
done()
}
}
logger.log('')
logger.info(`Checking \`${baseURL}\`.`)
logger.info(`Ignoring file extensions: \`${excludedExtensions.join(', ')}.\`\n`)
crawler.queue(startingURL)