const RuleConstant = {};
RuleConstant.RULES_EOF = 2;
RuleConstant.RULES_TAG = 3;

const PATTERN = {};
PATTERN.UNITS = /\s?(cm|mm|dl|cl|%)\s/gi;
PATTERN.SEPARATOR_UNITS = /(\d+\s?(cm|mm|dl|cl|%)\s)/gi;
PATTERN.SEPARATOR = /\s*?(;|$|\set\s|\.|!|\?|,)\s*/g;

const namespace = 'utils.semantic.tokenizer';

class SemanticTypeError extends TypeError {}

class TokenizerUtils {
  constructor(value, options = {}) {
    this.value = value;
    this.options = options;
    this.rules = [];
  }

  static instance(value) {
    if (!value) value = '';
    if (typeof value !== 'string') {
      throw new SemanticTypeError(`${namespace}.not.string`);
    }
    return new TokenizerUtils(value).identifySeparator();
  }

  identifySeparator() {
    if (this.value.indexOf('<br') !== -1 || this.value.indexOf('\n') !== -1) {
      this.rules.push(RuleConstant.RULES_EOF);
    }
    // the presence of HTML elements
    const patternHtml = /<\/?[a-z][\s\S]*>/i;
    if (patternHtml.test(this.value)) {
      this.rules.push(RuleConstant.RULES_TAG);
    }
    return this;
  }

  eof() {
    return this.value.eof().split(/\r?\n/);
  }

  striptags() {
    return this.value.striptags();
  }

  static words(patterScoped, value) {
    return value
      .trim()
      .replace(/\s\s+/g, ' ')
      .split(patterScoped)
      .filter(v => v)
      .map(w => w.trim());
  }

  split() {
    if (this.rules.includes(RuleConstant.RULES_TAG)) {
      this.value = this.striptags();
    }
    let valueInt = [this.value];
    if (this.rules.includes(RuleConstant.RULES_EOF)) {
      valueInt = [...this.eof()];
    }
    const values = valueInt
      .reduce((result, row) => {
        result.push(...TokenizerUtils.words(PATTERN.SEPARATOR, row).filter(c => c && c.length > 1));
        return result;
      }, [])
      .filter(Boolean);

    return values
      .reduce((result, row) => {
        if ((row.match(PATTERN.SEPARATOR_UNITS) || []).length > 0) {
          // eslint-disable-next-line no-param-reassign
          row = row.replace(PATTERN.SEPARATOR_UNITS, ', $1');
          result.push(
            ...TokenizerUtils.words(PATTERN.SEPARATOR, row).filter(c => c && c.length > 1),
          );
        } else {
          result.push(row);
        }
        return result;
      }, [])
      .filter(Boolean)
      .sort((a, b) => a < b)
      .uniq();
  }

  extract() {
    const units = this.value.match(PATTERN.UNITS) || [];
    const valueInt = this.value.match(PATTERN.SEPARATOR_UNITS) || [''];
    if (units.length > 1) {
      throw new SemanticTypeError(`${namespace}.extract.units.issued`);
    }
    if (valueInt.length > 1) {
      throw new SemanticTypeError(`${namespace}.extract.valueint.issued`);
    }
    const valueString = this.value.replace(PATTERN.SEPARATOR_UNITS, '');
    return {
      unit: units.length > 0 ? units[0].trim() : '',
      valueInt: valueInt.length > 0 ? valueInt[0].replace(units[0], '') : '',
      valueString: valueString ? valueString.trim() : '',
    };
  }
}

export const Tokenizer = (value) => {
  const instance = TokenizerUtils.instance(value);
  if (!instance) {
    return null;
  }
  return instance.split();
};
export const Compositionizer = (values) => {
  if (typeof values === 'string') {
    // eslint-disable-next-line no-param-reassign
    values = [values];
  }
  return values
    .filter(Boolean)
    .map(v => TokenizerUtils.instance(v).extract());
};
