94adff6e63ec1346d62860622a8c7fa10b86afb5e0dd124146c272ed4e8d062355e3ff59bed389b2c42578822cd9fd89610248593115a9a35718392de8914f 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /**
  2. * Not type-checking this file because it's mostly vendor code.
  3. */
  4. /*!
  5. * HTML Parser By John Resig (ejohn.org)
  6. * Modified by Juriy "kangax" Zaytsev
  7. * Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later)
  8. * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
  9. */
  10. import { makeMap, no } from 'shared/util'
  11. import { isNonPhrasingTag } from 'web/compiler/util'
  12. import { unicodeRegExp } from 'core/util/lang'
  13. // Regular Expressions for parsing tags and attributes
  14. const attribute = /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
  15. const dynamicArgAttribute = /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
  16. const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*`
  17. const qnameCapture = `((?:${ncname}\\:)?${ncname})`
  18. const startTagOpen = new RegExp(`^<${qnameCapture}`)
  19. const startTagClose = /^\s*(\/?)>/
  20. const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)
  21. const doctype = /^<!DOCTYPE [^>]+>/i
  22. // #7298: escape - to avoid being passed as HTML comment when inlined in page
  23. const comment = /^<!\--/
  24. const conditionalComment = /^<!\[/
  25. // Special Elements (can contain anything)
  26. export const isPlainTextElement = makeMap('script,style,textarea', true)
  27. const reCache = {}
  28. const decodingMap = {
  29. '&lt;': '<',
  30. '&gt;': '>',
  31. '&quot;': '"',
  32. '&amp;': '&',
  33. '&#10;': '\n',
  34. '&#9;': '\t',
  35. '&#39;': "'"
  36. }
  37. const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g
  38. const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g
  39. // #5992
  40. const isIgnoreNewlineTag = makeMap('pre,textarea', true)
  41. const shouldIgnoreFirstNewline = (tag, html) => tag && isIgnoreNewlineTag(tag) && html[0] === '\n'
  42. function decodeAttr (value, shouldDecodeNewlines) {
  43. const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr
  44. return value.replace(re, match => decodingMap[match])
  45. }
  46. export function parseHTML (html, options) {
  47. const stack = []
  48. const expectHTML = options.expectHTML
  49. const isUnaryTag = options.isUnaryTag || no
  50. const canBeLeftOpenTag = options.canBeLeftOpenTag || no
  51. let index = 0
  52. let last, lastTag
  53. while (html) {
  54. last = html
  55. // Make sure we're not in a plaintext content element like script/style
  56. if (!lastTag || !isPlainTextElement(lastTag)) {
  57. let textEnd = html.indexOf('<')
  58. if (textEnd === 0) {
  59. // Comment:
  60. if (comment.test(html)) {
  61. const commentEnd = html.indexOf('-->')
  62. if (commentEnd >= 0) {
  63. if (options.shouldKeepComment) {
  64. options.comment(html.substring(4, commentEnd), index, index + commentEnd + 3)
  65. }
  66. advance(commentEnd + 3)
  67. continue
  68. }
  69. }
  70. // http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
  71. if (conditionalComment.test(html)) {
  72. const conditionalEnd = html.indexOf(']>')
  73. if (conditionalEnd >= 0) {
  74. advance(conditionalEnd + 2)
  75. continue
  76. }
  77. }
  78. // Doctype:
  79. const doctypeMatch = html.match(doctype)
  80. if (doctypeMatch) {
  81. advance(doctypeMatch[0].length)
  82. continue
  83. }
  84. // End tag:
  85. const endTagMatch = html.match(endTag)
  86. if (endTagMatch) {
  87. const curIndex = index
  88. advance(endTagMatch[0].length)
  89. parseEndTag(endTagMatch[1], curIndex, index)
  90. continue
  91. }
  92. // Start tag:
  93. const startTagMatch = parseStartTag()
  94. if (startTagMatch) {
  95. handleStartTag(startTagMatch)
  96. if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) {
  97. advance(1)
  98. }
  99. continue
  100. }
  101. }
  102. let text, rest, next
  103. if (textEnd >= 0) {
  104. rest = html.slice(textEnd)
  105. while (
  106. !endTag.test(rest) &&
  107. !startTagOpen.test(rest) &&
  108. !comment.test(rest) &&
  109. !conditionalComment.test(rest)
  110. ) {
  111. // < in plain text, be forgiving and treat it as text
  112. next = rest.indexOf('<', 1)
  113. if (next < 0) break
  114. textEnd += next
  115. rest = html.slice(textEnd)
  116. }
  117. text = html.substring(0, textEnd)
  118. }
  119. if (textEnd < 0) {
  120. text = html
  121. }
  122. if (text) {
  123. advance(text.length)
  124. }
  125. if (options.chars && text) {
  126. options.chars(text, index - text.length, index)
  127. }
  128. } else {
  129. let endTagLength = 0
  130. const stackedTag = lastTag.toLowerCase()
  131. const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', 'i'))
  132. const rest = html.replace(reStackedTag, function (all, text, endTag) {
  133. endTagLength = endTag.length
  134. if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {
  135. text = text
  136. .replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298
  137. .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')
  138. }
  139. if (shouldIgnoreFirstNewline(stackedTag, text)) {
  140. text = text.slice(1)
  141. }
  142. if (options.chars) {
  143. options.chars(text)
  144. }
  145. return ''
  146. })
  147. index += html.length - rest.length
  148. html = rest
  149. parseEndTag(stackedTag, index - endTagLength, index)
  150. }
  151. if (html === last) {
  152. options.chars && options.chars(html)
  153. if (process.env.NODE_ENV !== 'production' && !stack.length && options.warn) {
  154. options.warn(`Mal-formatted tag at end of template: "${html}"`, { start: index + html.length })
  155. }
  156. break
  157. }
  158. }
  159. // Clean up any remaining tags
  160. parseEndTag()
  161. function advance (n) {
  162. index += n
  163. html = html.substring(n)
  164. }
  165. function parseStartTag () {
  166. const start = html.match(startTagOpen)
  167. if (start) {
  168. const match = {
  169. tagName: start[1],
  170. attrs: [],
  171. start: index
  172. }
  173. advance(start[0].length)
  174. let end, attr
  175. while (!(end = html.match(startTagClose)) && (attr = html.match(dynamicArgAttribute) || html.match(attribute))) {
  176. attr.start = index
  177. advance(attr[0].length)
  178. attr.end = index
  179. match.attrs.push(attr)
  180. }
  181. if (end) {
  182. match.unarySlash = end[1]
  183. advance(end[0].length)
  184. match.end = index
  185. return match
  186. }
  187. }
  188. }
  189. function handleStartTag (match) {
  190. const tagName = match.tagName
  191. const unarySlash = match.unarySlash
  192. if (expectHTML) {
  193. if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
  194. parseEndTag(lastTag)
  195. }
  196. if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
  197. parseEndTag(tagName)
  198. }
  199. }
  200. const unary = isUnaryTag(tagName) || !!unarySlash
  201. const l = match.attrs.length
  202. const attrs = new Array(l)
  203. for (let i = 0; i < l; i++) {
  204. const args = match.attrs[i]
  205. const value = args[3] || args[4] || args[5] || ''
  206. const shouldDecodeNewlines = tagName === 'a' && args[1] === 'href'
  207. ? options.shouldDecodeNewlinesForHref
  208. : options.shouldDecodeNewlines
  209. attrs[i] = {
  210. name: args[1],
  211. value: decodeAttr(value, shouldDecodeNewlines)
  212. }
  213. if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
  214. attrs[i].start = args.start + args[0].match(/^\s*/).length
  215. attrs[i].end = args.end
  216. }
  217. }
  218. if (!unary) {
  219. stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs, start: match.start, end: match.end })
  220. lastTag = tagName
  221. }
  222. if (options.start) {
  223. options.start(tagName, attrs, unary, match.start, match.end)
  224. }
  225. }
  226. function parseEndTag (tagName, start, end) {
  227. let pos, lowerCasedTagName
  228. if (start == null) start = index
  229. if (end == null) end = index
  230. // Find the closest opened tag of the same type
  231. if (tagName) {
  232. lowerCasedTagName = tagName.toLowerCase()
  233. for (pos = stack.length - 1; pos >= 0; pos--) {
  234. if (stack[pos].lowerCasedTag === lowerCasedTagName) {
  235. break
  236. }
  237. }
  238. } else {
  239. // If no tag name is provided, clean shop
  240. pos = 0
  241. }
  242. if (pos >= 0) {
  243. // Close all the open elements, up the stack
  244. for (let i = stack.length - 1; i >= pos; i--) {
  245. if (process.env.NODE_ENV !== 'production' &&
  246. (i > pos || !tagName) &&
  247. options.warn
  248. ) {
  249. options.warn(
  250. `tag <${stack[i].tag}> has no matching end tag.`,
  251. { start: stack[i].start, end: stack[i].end }
  252. )
  253. }
  254. if (options.end) {
  255. options.end(stack[i].tag, start, end)
  256. }
  257. }
  258. // Remove the open elements from the stack
  259. stack.length = pos
  260. lastTag = pos && stack[pos - 1].tag
  261. } else if (lowerCasedTagName === 'br') {
  262. if (options.start) {
  263. options.start(tagName, [], true, start, end)
  264. }
  265. } else if (lowerCasedTagName === 'p') {
  266. if (options.start) {
  267. options.start(tagName, [], false, start, end)
  268. }
  269. if (options.end) {
  270. options.end(tagName, start, end)
  271. }
  272. }
  273. }
  274. }