123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- /*jshint node:true */
- /*
- The MIT License (MIT)
- Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
- Permission is hereby granted, free of charge, to any person
- obtaining a copy of this software and associated documentation files
- (the "Software"), to deal in the Software without restriction,
- including without limitation the rights to use, copy, modify, merge,
- publish, distribute, sublicense, and/or sell copies of the Software,
- and to permit persons to whom the Software is furnished to do so,
- subject to the following conditions:
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
- */
- 'use strict';
- var InputScanner = require('../core/inputscanner').InputScanner;
- var Token = require('../core/token').Token;
- var TokenStream = require('../core/tokenstream').TokenStream;
- var WhitespacePattern = require('./whitespacepattern').WhitespacePattern;
- var TOKEN = {
- START: 'TK_START',
- RAW: 'TK_RAW',
- EOF: 'TK_EOF'
- };
- var Tokenizer = function(input_string, options) {
- this._input = new InputScanner(input_string);
- this._options = options || {};
- this.__tokens = null;
- this._patterns = {};
- this._patterns.whitespace = new WhitespacePattern(this._input);
- };
- Tokenizer.prototype.tokenize = function() {
- this._input.restart();
- this.__tokens = new TokenStream();
- this._reset();
- var current;
- var previous = new Token(TOKEN.START, '');
- var open_token = null;
- var open_stack = [];
- var comments = new TokenStream();
- while (previous.type !== TOKEN.EOF) {
- current = this._get_next_token(previous, open_token);
- while (this._is_comment(current)) {
- comments.add(current);
- current = this._get_next_token(previous, open_token);
- }
- if (!comments.isEmpty()) {
- current.comments_before = comments;
- comments = new TokenStream();
- }
- current.parent = open_token;
- if (this._is_opening(current)) {
- open_stack.push(open_token);
- open_token = current;
- } else if (open_token && this._is_closing(current, open_token)) {
- current.opened = open_token;
- open_token.closed = current;
- open_token = open_stack.pop();
- current.parent = open_token;
- }
- current.previous = previous;
- previous.next = current;
- this.__tokens.add(current);
- previous = current;
- }
- return this.__tokens;
- };
- Tokenizer.prototype._is_first_token = function() {
- return this.__tokens.isEmpty();
- };
- Tokenizer.prototype._reset = function() {};
- Tokenizer.prototype._get_next_token = function(previous_token, open_token) { // jshint unused:false
- this._readWhitespace();
- var resulting_string = this._input.read(/.+/g);
- if (resulting_string) {
- return this._create_token(TOKEN.RAW, resulting_string);
- } else {
- return this._create_token(TOKEN.EOF, '');
- }
- };
- Tokenizer.prototype._is_comment = function(current_token) { // jshint unused:false
- return false;
- };
- Tokenizer.prototype._is_opening = function(current_token) { // jshint unused:false
- return false;
- };
- Tokenizer.prototype._is_closing = function(current_token, open_token) { // jshint unused:false
- return false;
- };
- Tokenizer.prototype._create_token = function(type, text) {
- var token = new Token(type, text,
- this._patterns.whitespace.newline_count,
- this._patterns.whitespace.whitespace_before_token);
- return token;
- };
- Tokenizer.prototype._readWhitespace = function() {
- return this._patterns.whitespace.read();
- };
- module.exports.Tokenizer = Tokenizer;
- module.exports.TOKEN = TOKEN;
|