'use strict';

Object.defineProperty(exports, "__esModule", {
  value: true
});

var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }();

var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); /**
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      * The MIT License (MIT)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      * Copyright (c) 2015-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      */

var _specialSymbols = require('./special-symbols');

var _colors = require('colors');

var _colors2 = _interopRequireDefault(_colors);

function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } }

function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }

var EOF_TOKEN = {
  type: _specialSymbols.EOF,
  value: _specialSymbols.EOF
};

/**
 * A default tokenizer that extracts tokens from the string,
 * based on the tokens from the grammar. Uses underlying
 * regexp implementation.
 */

var Tokenizer = function () {
  /**
   * Creates a tokenizer instance for a string
   * that belongs to the given grammar.
   */
  function Tokenizer(_ref) {
    var string = _ref.string,
        lexGrammar = _ref.lexGrammar;

    _classCallCheck(this, Tokenizer);

    /**
     * Corresponding lexical grammar.
     */
    this._lexGrammar = lexGrammar;

    if (string) {
      this.initString(string);
    }
  }

  /**
   * Returns tokenizer states.
   */


  _createClass(Tokenizer, [{
    key: 'getStates',
    value: function getStates() {
      return this._states;
    }

    /**
     * Returns current state.
     */

  }, {
    key: 'getCurrentState',
    value: function getCurrentState() {
      return this._states[this._states.length - 1];
    }

    /**
     * Pushes a new state for the tokinizer. Some lex-rules may
     * specify in which state they are triggered. A rule won't be
     * triggered if a tokenizer is not in this state.
     */

  }, {
    key: 'pushState',
    value: function pushState(state) {
      this._states.push(state);
    }

    /**
     * Alias for `pushState`.
     */

  }, {
    key: 'begin',
    value: function begin(state) {
      this.pushState(state);
    }

    /**
     * Pops a state. If there is only INITIAL state, just returns it.
     */

  }, {
    key: 'popState',
    value: function popState() {
      if (this._states.length > 1) {
        return this._states.pop();
      }
      return this._states[0];
    }

    /**
     * Initializes a parsing string, and corresponding meta data.
     */

  }, {
    key: 'initString',
    value: function initString(string) {
      /**
       * Tokenizing string.
       */
      this._string = string;

      /**
       * Tracking cursor (absolute offset).
       */
      this._cursor = 0;

      /**
       * Tokenizer states to work with start conditions of lex rules.
       * The `INITIAL` state always present, i.e. all rules with no
       * explicit start conditions are executed, untill a new state is
       * pushed. If the state is exclusive, then only the rules with this
       * start condition are executed. If it's inclusive, then in addition
       * rules with no start conditions are executed as well.
       * https://gist.github.com/DmitrySoshnikov/f5e2583b37e8f758c789cea9dcdf238a
       */
      this._states = ['INITIAL'];

      /**
       * In case if a token handler returns multiple tokens from one rule,
       * we still return tokens one by one in the `getNextToken`, putting
       * other "fake" tokens into the queue. If there is still something in
       * this queue, it's just returned.
       */
      this._tokensQueue = [];

      /**
       * Current line number.
       */
      this._currentLine = 1;

      /**
       * Current column number.
       */
      this._currentColumn = 0;

      /**
       * Current offset of the beginning of the current line.
       *
       * Since new lines can be handled by the lex rules themselves,
       * we scan an extracted token for `\n`s, and calculate start/end
       * locations of tokens based on the `currentLine`/`currentLineBeginOffset`.
       */
      this._currentLineBeginOffset = 0;

      /**
       * Matched token location data.
       */
      this._tokenStartOffset = 0;
      this._tokenEndOffset = 0;
      this._tokenStartLine = 1;
      this._tokenEndLine = 1;
      this._tokenStartColumn = 0;
      this._tokenEndColumn = 0;
    }
  }, {
    key: 'getTokens',
    value: function getTokens() {
      if (!this._tokens) {
        // Rewind to calculate all tokens.
        var cursor = this._cursor;
        this._cursor = 0;
        this._tokens = [];
        while (this.hasMoreTokens()) {
          this._tokens.push(this.getNextToken());
        }
        // And restore back for the `getNextToken`.
        this._cursor = cursor;
      }
      return this._tokens;
    }

    /**
     * Returns next token.
     */

  }, {
    key: 'getNextToken',
    value: function getNextToken() {
      // Something was queued, return it.
      if (this._tokensQueue.length > 0) {
        return this.onToken(this._toToken(this._tokensQueue.shift()));
      }

      if (!this.hasMoreTokens()) {
        return this.onToken(EOF_TOKEN);
      }

      // Analyze untokenized yet part of the string starting from
      // the current cursor position (so all regexp are from ^).
      var string = this._string.slice(this._cursor);

      // Get all rules which should be considered for this state.
      var lexRulesForState = this._lexGrammar.getRulesForState(this.getCurrentState());

      var _iteratorNormalCompletion = true;
      var _didIteratorError = false;
      var _iteratorError = undefined;

      try {
        for (var _iterator = lexRulesForState[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
          var lexRule = _step.value;

          var matched = this._match(string, lexRule.getMatcher());

          // Manual handling of EOF token (the end of string). Return it
          // as `EOF` symbol.
          if (string === '' && matched === '') {
            this._cursor++;
          }

          if (matched !== null) {
            var yytext = void 0,
                rawToken = void 0;

            try {
              var _lexRule$getTokenData = lexRule.getTokenData(matched, this);

              var _lexRule$getTokenData2 = _slicedToArray(_lexRule$getTokenData, 2);

              yytext = _lexRule$getTokenData2[0];
              rawToken = _lexRule$getTokenData2[1];
            } catch (e) {
              console.error(_colors2.default.red('\nError in handler:\n\n') + lexRule.getRawHandler() + '\n');
              throw e;
            }

            // Usually whitespaces, etc.
            if (!rawToken) {
              return this.getNextToken();
            }

            // If multiple tokens are returned, save them to return
            // on next `getNextToken` call.

            if (Array.isArray(rawToken)) {
              var tokensToQueue = rawToken.slice(1);
              rawToken = rawToken[0];
              if (tokensToQueue.length > 0) {
                var _tokensQueue;

                (_tokensQueue = this._tokensQueue).unshift.apply(_tokensQueue, _toConsumableArray(tokensToQueue));
              }
            }

            return this.onToken(this._toToken(rawToken, yytext));
          }
        }
      } catch (err) {
        _didIteratorError = true;
        _iteratorError = err;
      } finally {
        try {
          if (!_iteratorNormalCompletion && _iterator.return) {
            _iterator.return();
          }
        } finally {
          if (_didIteratorError) {
            throw _iteratorError;
          }
        }
      }

      if (this.isEOF()) {
        this._cursor++;
        return EOF_TOKEN;
      }

      this.throwUnexpectedToken(string[0], this._currentLine, this._currentColumn);
    }

    /**
     * Throws default "Unexpected token" exception, showing the actual
     * line from the source, pointing with the ^ marker to the bad token.
     * In addition, shows `line:column` location.
     */

  }, {
    key: 'throwUnexpectedToken',
    value: function throwUnexpectedToken(symbol, line, column) {
      var lineSource = this._string.split('\n')[line - 1];
      var lineData = '';

      if (lineSource) {
        var pad = ' '.repeat(column);
        lineData = '\n\n' + lineSource + '\n' + pad + '^\n';
      }

      throw new SyntaxError(lineData + 'Unexpected token: "' + symbol + '" ' + ('at ' + line + ':' + column + '.'));
    }
  }, {
    key: '_captureLocation',
    value: function _captureLocation(matched) {
      var nlRe = /\n/g;

      // Absolute offsets.
      this._tokenStartOffset = this._cursor;

      // Line-based locations, start.
      this._tokenStartLine = this._currentLine;
      this._tokenStartColumn = this._tokenStartOffset - this._currentLineBeginOffset;

      // Extract `\n` in the matched token.
      var nlMatch = void 0;
      while ((nlMatch = nlRe.exec(matched)) !== null) {
        this._currentLine++;
        this._currentLineBeginOffset = this._tokenStartOffset + nlMatch.index + 1;
      }

      this._tokenEndOffset = this._cursor + matched.length;

      // Line-based locations, end.
      this._tokenEndLine = this._currentLine;
      this._tokenEndColumn = this._currentColumn = this._tokenEndOffset - this._currentLineBeginOffset;
    }
  }, {
    key: '_toToken',
    value: function _toToken(tokenType) {
      var yytext = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : '';

      return {
        // Basic data.
        type: tokenType,
        value: yytext,

        // Location data.
        startOffset: this._tokenStartOffset,
        endOffset: this._tokenEndOffset,
        startLine: this._tokenStartLine,
        endLine: this._tokenEndLine,
        startColumn: this._tokenStartColumn,
        endColumn: this._tokenEndColumn
      };
    }
  }, {
    key: 'isEOF',
    value: function isEOF() {
      return this._cursor === this._string.length;
    }
  }, {
    key: 'hasMoreTokens',
    value: function hasMoreTokens() {
      return this._cursor <= this._string.length;
    }

    /**
     * Generic tokenizing based on current regexp.
     */

  }, {
    key: '_match',
    value: function _match(string, regexp) {
      var matched = string.match(regexp);
      if (matched) {
        // Handle `\n` in the matched token to track line numbers.
        this._captureLocation(matched[0]);
        this._cursor += matched[0].length;
        return matched[0];
      }
      return null;
    }

    /**
     * Allows analyzing, and transforming token. Default implementation
     * just passes the token through.
     */

  }, {
    key: 'onToken',
    value: function onToken(token) {
      return token;
    }
  }]);

  return Tokenizer;
}();

exports.default = Tokenizer;