380204333344cd63bcd50a010927fab703cd8556e83ffffcfa3c79aacd4e95b524c03f7b800760c082b31d3fb0f5c644ab0dcb87bf742774a5f0dec45aada1 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720
  1. 'use strict'
  2. const check = require('check-types')
  3. const error = require('./error')
  4. const EventEmitter = require('events').EventEmitter
  5. const events = require('./events')
  6. const promise = require('./promise')
  7. const terminators = {
  8. obj: '}',
  9. arr: ']'
  10. }
  11. const escapes = {
  12. /* eslint-disable quote-props */
  13. '"': '"',
  14. '\\': '\\',
  15. '/': '/',
  16. 'b': '\b',
  17. 'f': '\f',
  18. 'n': '\n',
  19. 'r': '\r',
  20. 't': '\t'
  21. /* eslint-enable quote-props */
  22. }
  23. module.exports = initialise
  24. /**
  25. * Public function `walk`.
  26. *
  27. * Returns an event emitter and asynchronously walks a stream of JSON data,
  28. * emitting events as it encounters tokens. The event emitter is decorated
  29. * with a `pause` method that can be called to pause processing.
  30. *
  31. * @param stream: Readable instance representing the incoming JSON.
  32. *
  33. * @option yieldRate: The number of data items to process per timeslice,
  34. * default is 16384.
  35. *
  36. * @option Promise: The promise constructor to use, defaults to bluebird.
  37. *
  38. * @option ndjson: Set this to true to parse newline-delimited JSON.
  39. **/
  40. function initialise (stream, options = {}) {
  41. check.assert.instanceStrict(stream, require('stream').Readable, 'Invalid stream argument')
  42. const currentPosition = {
  43. line: 1,
  44. column: 1
  45. }
  46. const emitter = new EventEmitter()
  47. const handlers = {
  48. arr: value,
  49. obj: property
  50. }
  51. const json = []
  52. const lengths = []
  53. const previousPosition = {}
  54. const Promise = promise(options)
  55. const scopes = []
  56. const yieldRate = options.yieldRate || 16384
  57. const shouldHandleNdjson = !! options.ndjson
  58. let index = 0
  59. let isStreamEnded = false
  60. let isWalkBegun = false
  61. let isWalkEnded = false
  62. let isWalkingString = false
  63. let hasEndedLine = true
  64. let count = 0
  65. let resumeFn
  66. let pause
  67. let cachedCharacter
  68. stream.setEncoding('utf8')
  69. stream.on('data', readStream)
  70. stream.on('end', endStream)
  71. stream.on('error', err => {
  72. emitter.emit(events.error, err)
  73. endStream()
  74. })
  75. emitter.pause = () => {
  76. let resolve
  77. pause = new Promise(res => resolve = res)
  78. return () => {
  79. pause = null
  80. count = 0
  81. if (shouldHandleNdjson && isStreamEnded && isWalkEnded) {
  82. emit(events.end)
  83. } else {
  84. resolve()
  85. }
  86. }
  87. }
  88. return emitter
  89. function readStream (chunk) {
  90. addChunk(chunk)
  91. if (isWalkBegun) {
  92. return resume()
  93. }
  94. isWalkBegun = true
  95. value()
  96. }
  97. function addChunk (chunk) {
  98. json.push(chunk)
  99. const chunkLength = chunk.length
  100. lengths.push({
  101. item: chunkLength,
  102. aggregate: length() + chunkLength
  103. })
  104. }
  105. function length () {
  106. const chunkCount = lengths.length
  107. if (chunkCount === 0) {
  108. return 0
  109. }
  110. return lengths[chunkCount - 1].aggregate
  111. }
  112. function value () {
  113. /* eslint-disable no-underscore-dangle */
  114. if (++count % yieldRate !== 0) {
  115. return _do()
  116. }
  117. return new Promise(resolve => {
  118. setImmediate(() => _do().then(resolve))
  119. })
  120. function _do () {
  121. return awaitNonWhitespace()
  122. .then(next)
  123. .then(handleValue)
  124. .catch(() => {})
  125. }
  126. /* eslint-enable no-underscore-dangle */
  127. }
  128. function awaitNonWhitespace () {
  129. return wait()
  130. function wait () {
  131. return awaitCharacter()
  132. .then(step)
  133. }
  134. function step () {
  135. if (isWhitespace(character())) {
  136. return next().then(wait)
  137. }
  138. }
  139. }
  140. function awaitCharacter () {
  141. let resolve, reject
  142. if (index < length()) {
  143. return Promise.resolve()
  144. }
  145. if (isStreamEnded) {
  146. setImmediate(endWalk)
  147. return Promise.reject()
  148. }
  149. resumeFn = after
  150. return new Promise((res, rej) => {
  151. resolve = res
  152. reject = rej
  153. })
  154. function after () {
  155. if (index < length()) {
  156. return resolve()
  157. }
  158. reject()
  159. if (isStreamEnded) {
  160. setImmediate(endWalk)
  161. }
  162. }
  163. }
  164. function character () {
  165. if (cachedCharacter) {
  166. return cachedCharacter
  167. }
  168. if (lengths[0].item > index) {
  169. return cachedCharacter = json[0][index]
  170. }
  171. const len = lengths.length
  172. for (let i = 1; i < len; ++i) {
  173. const { aggregate, item } = lengths[i]
  174. if (aggregate > index) {
  175. return cachedCharacter = json[i][index + item - aggregate]
  176. }
  177. }
  178. }
  179. function isWhitespace (char) {
  180. switch (char) {
  181. case '\n':
  182. if (shouldHandleNdjson && scopes.length === 0) {
  183. return false
  184. }
  185. case ' ':
  186. case '\t':
  187. case '\r':
  188. return true
  189. }
  190. return false
  191. }
  192. function next () {
  193. return awaitCharacter().then(after)
  194. function after () {
  195. const result = character()
  196. cachedCharacter = null
  197. index += 1
  198. previousPosition.line = currentPosition.line
  199. previousPosition.column = currentPosition.column
  200. if (result === '\n') {
  201. currentPosition.line += 1
  202. currentPosition.column = 1
  203. } else {
  204. currentPosition.column += 1
  205. }
  206. if (index > lengths[0].aggregate) {
  207. json.shift()
  208. const difference = lengths.shift().item
  209. index -= difference
  210. lengths.forEach(len => len.aggregate -= difference)
  211. }
  212. return result
  213. }
  214. }
  215. function handleValue (char) {
  216. if (shouldHandleNdjson && scopes.length === 0) {
  217. if (char === '\n') {
  218. hasEndedLine = true
  219. return emit(events.endLine)
  220. .then(value)
  221. }
  222. if (! hasEndedLine) {
  223. return fail(char, '\n', previousPosition)
  224. .then(value)
  225. }
  226. hasEndedLine = false
  227. }
  228. switch (char) {
  229. case '[':
  230. return array()
  231. case '{':
  232. return object()
  233. case '"':
  234. return string()
  235. case '0':
  236. case '1':
  237. case '2':
  238. case '3':
  239. case '4':
  240. case '5':
  241. case '6':
  242. case '7':
  243. case '8':
  244. case '9':
  245. case '-':
  246. case '.':
  247. return number(char)
  248. case 'f':
  249. return literalFalse()
  250. case 'n':
  251. return literalNull()
  252. case 't':
  253. return literalTrue()
  254. default:
  255. return fail(char, 'value', previousPosition)
  256. .then(value)
  257. }
  258. }
  259. function array () {
  260. return scope(events.array, value)
  261. }
  262. function scope (event, contentHandler) {
  263. return emit(event)
  264. .then(() => {
  265. scopes.push(event)
  266. return endScope(event)
  267. })
  268. .then(contentHandler)
  269. }
  270. function emit (...args) {
  271. return (pause || Promise.resolve())
  272. .then(() => {
  273. try {
  274. emitter.emit(...args)
  275. } catch (err) {
  276. try {
  277. emitter.emit(events.error, err)
  278. } catch (_) {
  279. // When calling user code, anything is possible
  280. }
  281. }
  282. })
  283. }
  284. function endScope (scp) {
  285. return awaitNonWhitespace()
  286. .then(() => {
  287. if (character() === terminators[scp]) {
  288. return emit(events.endPrefix + scp)
  289. .then(() => {
  290. scopes.pop()
  291. return next()
  292. })
  293. .then(endValue)
  294. }
  295. })
  296. .catch(endWalk)
  297. }
  298. function endValue () {
  299. return awaitNonWhitespace()
  300. .then(after)
  301. .catch(endWalk)
  302. function after () {
  303. if (scopes.length === 0) {
  304. if (shouldHandleNdjson) {
  305. return value()
  306. }
  307. return fail(character(), 'EOF', currentPosition)
  308. .then(value)
  309. }
  310. return checkScope()
  311. }
  312. function checkScope () {
  313. const scp = scopes[scopes.length - 1]
  314. const handler = handlers[scp]
  315. return endScope(scp)
  316. .then(() => {
  317. if (scopes.length > 0) {
  318. return checkCharacter(character(), ',', currentPosition)
  319. }
  320. })
  321. .then(result => {
  322. if (result) {
  323. return next()
  324. }
  325. })
  326. .then(handler)
  327. }
  328. }
  329. function fail (actual, expected, position) {
  330. return emit(
  331. events.dataError,
  332. error.create(
  333. actual,
  334. expected,
  335. position.line,
  336. position.column
  337. )
  338. )
  339. }
  340. function checkCharacter (char, expected, position) {
  341. if (char === expected) {
  342. return Promise.resolve(true)
  343. }
  344. return fail(char, expected, position)
  345. .then(false)
  346. }
  347. function object () {
  348. return scope(events.object, property)
  349. }
  350. function property () {
  351. return awaitNonWhitespace()
  352. .then(next)
  353. .then(propertyName)
  354. }
  355. function propertyName (char) {
  356. return checkCharacter(char, '"', previousPosition)
  357. .then(() => walkString(events.property))
  358. .then(awaitNonWhitespace)
  359. .then(next)
  360. .then(propertyValue)
  361. }
  362. function propertyValue (char) {
  363. return checkCharacter(char, ':', previousPosition)
  364. .then(value)
  365. }
  366. function walkString (event) {
  367. let isEscaping = false
  368. const str = []
  369. isWalkingString = true
  370. return next().then(step)
  371. function step (char) {
  372. if (isEscaping) {
  373. isEscaping = false
  374. return escape(char).then(escaped => {
  375. str.push(escaped)
  376. return next().then(step)
  377. })
  378. }
  379. if (char === '\\') {
  380. isEscaping = true
  381. return next().then(step)
  382. }
  383. if (char !== '"') {
  384. str.push(char)
  385. return next().then(step)
  386. }
  387. isWalkingString = false
  388. return emit(event, str.join(''))
  389. }
  390. }
  391. function escape (char) {
  392. if (escapes[char]) {
  393. return Promise.resolve(escapes[char])
  394. }
  395. if (char === 'u') {
  396. return escapeHex()
  397. }
  398. return fail(char, 'escape character', previousPosition)
  399. .then(() => `\\${char}`)
  400. }
  401. function escapeHex () {
  402. let hexits = []
  403. return next().then(step.bind(null, 0))
  404. function step (idx, char) {
  405. if (isHexit(char)) {
  406. hexits.push(char)
  407. }
  408. if (idx < 3) {
  409. return next().then(step.bind(null, idx + 1))
  410. }
  411. hexits = hexits.join('')
  412. if (hexits.length === 4) {
  413. return String.fromCharCode(parseInt(hexits, 16))
  414. }
  415. return fail(char, 'hex digit', previousPosition)
  416. .then(() => `\\u${hexits}${char}`)
  417. }
  418. }
  419. function string () {
  420. return walkString(events.string).then(endValue)
  421. }
  422. function number (firstCharacter) {
  423. let digits = [ firstCharacter ]
  424. return walkDigits().then(addDigits.bind(null, checkDecimalPlace))
  425. function addDigits (step, result) {
  426. digits = digits.concat(result.digits)
  427. if (result.atEnd) {
  428. return endNumber()
  429. }
  430. return step()
  431. }
  432. function checkDecimalPlace () {
  433. if (character() === '.') {
  434. return next()
  435. .then(char => {
  436. digits.push(char)
  437. return walkDigits()
  438. })
  439. .then(addDigits.bind(null, checkExponent))
  440. }
  441. return checkExponent()
  442. }
  443. function checkExponent () {
  444. if (character() === 'e' || character() === 'E') {
  445. return next()
  446. .then(char => {
  447. digits.push(char)
  448. return awaitCharacter()
  449. })
  450. .then(checkSign)
  451. .catch(fail.bind(null, 'EOF', 'exponent', currentPosition))
  452. }
  453. return endNumber()
  454. }
  455. function checkSign () {
  456. if (character() === '+' || character() === '-') {
  457. return next().then(char => {
  458. digits.push(char)
  459. return readExponent()
  460. })
  461. }
  462. return readExponent()
  463. }
  464. function readExponent () {
  465. return walkDigits().then(addDigits.bind(null, endNumber))
  466. }
  467. function endNumber () {
  468. return emit(events.number, parseFloat(digits.join('')))
  469. .then(endValue)
  470. }
  471. }
  472. function walkDigits () {
  473. const digits = []
  474. return wait()
  475. function wait () {
  476. return awaitCharacter()
  477. .then(step)
  478. .catch(atEnd)
  479. }
  480. function step () {
  481. if (isDigit(character())) {
  482. return next().then(char => {
  483. digits.push(char)
  484. return wait()
  485. })
  486. }
  487. return { digits, atEnd: false }
  488. }
  489. function atEnd () {
  490. return { digits, atEnd: true }
  491. }
  492. }
  493. function literalFalse () {
  494. return literal([ 'a', 'l', 's', 'e' ], false)
  495. }
  496. function literal (expectedCharacters, val) {
  497. let actual, expected, invalid
  498. return wait()
  499. function wait () {
  500. return awaitCharacter()
  501. .then(step)
  502. .catch(atEnd)
  503. }
  504. function step () {
  505. if (invalid || expectedCharacters.length === 0) {
  506. return atEnd()
  507. }
  508. return next().then(afterNext)
  509. }
  510. function atEnd () {
  511. return Promise.resolve()
  512. .then(() => {
  513. if (invalid) {
  514. return fail(actual, expected, previousPosition)
  515. }
  516. if (expectedCharacters.length > 0) {
  517. return fail('EOF', expectedCharacters.shift(), currentPosition)
  518. }
  519. return done()
  520. })
  521. .then(endValue)
  522. }
  523. function afterNext (char) {
  524. actual = char
  525. expected = expectedCharacters.shift()
  526. if (actual !== expected) {
  527. invalid = true
  528. }
  529. return wait()
  530. }
  531. function done () {
  532. return emit(events.literal, val)
  533. }
  534. }
  535. function literalNull () {
  536. return literal([ 'u', 'l', 'l' ], null)
  537. }
  538. function literalTrue () {
  539. return literal([ 'r', 'u', 'e' ], true)
  540. }
  541. function endStream () {
  542. isStreamEnded = true
  543. if (isWalkBegun) {
  544. return resume()
  545. }
  546. endWalk()
  547. }
  548. function resume () {
  549. if (resumeFn) {
  550. resumeFn()
  551. resumeFn = null
  552. }
  553. }
  554. function endWalk () {
  555. if (isWalkEnded) {
  556. return Promise.resolve()
  557. }
  558. isWalkEnded = true
  559. return Promise.resolve()
  560. .then(() => {
  561. if (isWalkingString) {
  562. return fail('EOF', '"', currentPosition)
  563. }
  564. })
  565. .then(popScopes)
  566. .then(() => emit(events.end))
  567. }
  568. function popScopes () {
  569. if (scopes.length === 0) {
  570. return Promise.resolve()
  571. }
  572. return fail('EOF', terminators[scopes.pop()], currentPosition)
  573. .then(popScopes)
  574. }
  575. }
  576. function isHexit (character) {
  577. return isDigit(character) ||
  578. isInRange(character, 'A', 'F') ||
  579. isInRange(character, 'a', 'f')
  580. }
  581. function isDigit (character) {
  582. return isInRange(character, '0', '9')
  583. }
  584. function isInRange (character, lower, upper) {
  585. const code = character.charCodeAt(0)
  586. return code >= lower.charCodeAt(0) && code <= upper.charCodeAt(0)
  587. }