| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306 |
- /**
- * Not type-checking this file because it's mostly vendor code.
- */
- /*!
- * HTML Parser By John Resig (ejohn.org)
- * Modified by Juriy "kangax" Zaytsev
- * Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later)
- * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
- */
- import { makeMap, no } from 'shared/util'
- import { isNonPhrasingTag } from 'web/compiler/util'
- import { unicodeRegExp } from 'core/util/lang'
- // Regular Expressions for parsing tags and attributes
- const attribute = /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
- const dynamicArgAttribute = /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
- const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*`
- const qnameCapture = `((?:${ncname}\\:)?${ncname})`
- const startTagOpen = new RegExp(`^<${qnameCapture}`)
- const startTagClose = /^\s*(\/?)>/
- const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)
- const doctype = /^<!DOCTYPE [^>]+>/i
- // #7298: escape - to avoid being passed as HTML comment when inlined in page
- const comment = /^<!\--/
- const conditionalComment = /^<!\[/
- // Special Elements (can contain anything)
- export const isPlainTextElement = makeMap('script,style,textarea', true)
- const reCache = {}
- const decodingMap = {
- '<': '<',
- '>': '>',
- '"': '"',
- '&': '&',
- ' ': '\n',
- '	': '\t',
- ''': "'"
- }
- const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g
- const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g
- // #5992
- const isIgnoreNewlineTag = makeMap('pre,textarea', true)
- const shouldIgnoreFirstNewline = (tag, html) => tag && isIgnoreNewlineTag(tag) && html[0] === '\n'
- function decodeAttr (value, shouldDecodeNewlines) {
- const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr
- return value.replace(re, match => decodingMap[match])
- }
- export function parseHTML (html, options) {
- const stack = []
- const expectHTML = options.expectHTML
- const isUnaryTag = options.isUnaryTag || no
- const canBeLeftOpenTag = options.canBeLeftOpenTag || no
- let index = 0
- let last, lastTag
- while (html) {
- last = html
- // Make sure we're not in a plaintext content element like script/style
- if (!lastTag || !isPlainTextElement(lastTag)) {
- let textEnd = html.indexOf('<')
- if (textEnd === 0) {
- // Comment:
- if (comment.test(html)) {
- const commentEnd = html.indexOf('-->')
- if (commentEnd >= 0) {
- if (options.shouldKeepComment) {
- options.comment(html.substring(4, commentEnd), index, index + commentEnd + 3)
- }
- advance(commentEnd + 3)
- continue
- }
- }
- // http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
- if (conditionalComment.test(html)) {
- const conditionalEnd = html.indexOf(']>')
- if (conditionalEnd >= 0) {
- advance(conditionalEnd + 2)
- continue
- }
- }
- // Doctype:
- const doctypeMatch = html.match(doctype)
- if (doctypeMatch) {
- advance(doctypeMatch[0].length)
- continue
- }
- // End tag:
- const endTagMatch = html.match(endTag)
- if (endTagMatch) {
- const curIndex = index
- advance(endTagMatch[0].length)
- parseEndTag(endTagMatch[1], curIndex, index)
- continue
- }
- // Start tag:
- const startTagMatch = parseStartTag()
- if (startTagMatch) {
- handleStartTag(startTagMatch)
- if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) {
- advance(1)
- }
- continue
- }
- }
- let text, rest, next
- if (textEnd >= 0) {
- rest = html.slice(textEnd)
- while (
- !endTag.test(rest) &&
- !startTagOpen.test(rest) &&
- !comment.test(rest) &&
- !conditionalComment.test(rest)
- ) {
- // < in plain text, be forgiving and treat it as text
- next = rest.indexOf('<', 1)
- if (next < 0) break
- textEnd += next
- rest = html.slice(textEnd)
- }
- text = html.substring(0, textEnd)
- }
- if (textEnd < 0) {
- text = html
- }
- if (text) {
- advance(text.length)
- }
- if (options.chars && text) {
- options.chars(text, index - text.length, index)
- }
- } else {
- let endTagLength = 0
- const stackedTag = lastTag.toLowerCase()
- const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', 'i'))
- const rest = html.replace(reStackedTag, function (all, text, endTag) {
- endTagLength = endTag.length
- if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {
- text = text
- .replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298
- .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')
- }
- if (shouldIgnoreFirstNewline(stackedTag, text)) {
- text = text.slice(1)
- }
- if (options.chars) {
- options.chars(text)
- }
- return ''
- })
- index += html.length - rest.length
- html = rest
- parseEndTag(stackedTag, index - endTagLength, index)
- }
- if (html === last) {
- options.chars && options.chars(html)
- if (process.env.NODE_ENV !== 'production' && !stack.length && options.warn) {
- options.warn(`Mal-formatted tag at end of template: "${html}"`, { start: index + html.length })
- }
- break
- }
- }
- // Clean up any remaining tags
- parseEndTag()
- function advance (n) {
- index += n
- html = html.substring(n)
- }
- function parseStartTag () {
- const start = html.match(startTagOpen)
- if (start) {
- const match = {
- tagName: start[1],
- attrs: [],
- start: index
- }
- advance(start[0].length)
- let end, attr
- while (!(end = html.match(startTagClose)) && (attr = html.match(dynamicArgAttribute) || html.match(attribute))) {
- attr.start = index
- advance(attr[0].length)
- attr.end = index
- match.attrs.push(attr)
- }
- if (end) {
- match.unarySlash = end[1]
- advance(end[0].length)
- match.end = index
- return match
- }
- }
- }
- function handleStartTag (match) {
- const tagName = match.tagName
- const unarySlash = match.unarySlash
- if (expectHTML) {
- if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
- parseEndTag(lastTag)
- }
- if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
- parseEndTag(tagName)
- }
- }
- const unary = isUnaryTag(tagName) || !!unarySlash
- const l = match.attrs.length
- const attrs = new Array(l)
- for (let i = 0; i < l; i++) {
- const args = match.attrs[i]
- const value = args[3] || args[4] || args[5] || ''
- const shouldDecodeNewlines = tagName === 'a' && args[1] === 'href'
- ? options.shouldDecodeNewlinesForHref
- : options.shouldDecodeNewlines
- attrs[i] = {
- name: args[1],
- value: decodeAttr(value, shouldDecodeNewlines)
- }
- if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
- attrs[i].start = args.start + args[0].match(/^\s*/).length
- attrs[i].end = args.end
- }
- }
- if (!unary) {
- stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs, start: match.start, end: match.end })
- lastTag = tagName
- }
- if (options.start) {
- options.start(tagName, attrs, unary, match.start, match.end)
- }
- }
- function parseEndTag (tagName, start, end) {
- let pos, lowerCasedTagName
- if (start == null) start = index
- if (end == null) end = index
- // Find the closest opened tag of the same type
- if (tagName) {
- lowerCasedTagName = tagName.toLowerCase()
- for (pos = stack.length - 1; pos >= 0; pos--) {
- if (stack[pos].lowerCasedTag === lowerCasedTagName) {
- break
- }
- }
- } else {
- // If no tag name is provided, clean shop
- pos = 0
- }
- if (pos >= 0) {
- // Close all the open elements, up the stack
- for (let i = stack.length - 1; i >= pos; i--) {
- if (process.env.NODE_ENV !== 'production' &&
- (i > pos || !tagName) &&
- options.warn
- ) {
- options.warn(
- `tag <${stack[i].tag}> has no matching end tag.`,
- { start: stack[i].start, end: stack[i].end }
- )
- }
- if (options.end) {
- options.end(stack[i].tag, start, end)
- }
- }
- // Remove the open elements from the stack
- stack.length = pos
- lastTag = pos && stack[pos - 1].tag
- } else if (lowerCasedTagName === 'br') {
- if (options.start) {
- options.start(tagName, [], true, start, end)
- }
- } else if (lowerCasedTagName === 'p') {
- if (options.start) {
- options.start(tagName, [], false, start, end)
- }
- if (options.end) {
- options.end(tagName, start, end)
- }
- }
- }
- }
|