import * as emojiRegex from 'twemoji-parser/dist/lib/regex';

export enum TOKEN_TYPE {
  UNDERLINE = 'underline',
  BOLD = 'bold',
  BOLD_ITALICS = 'bold-italics',
  UNDERLINE_ITALICS = 'underline-italics',
  ITALICS = 'italics',
  STRIKETHROUGH = 'strikethrough',
  LINK = 'link',
  CODEBLOCK_START = 'codeblock-start',
  CODEBLOCK_END = 'codeblock-end',
  CODEBLOCK = 'codeblock',
  INLINE_CODE = 'inline-code',
  QUOTES = 'quotes',
  EMOJI = 'emoji',
  PARAGRAPH = 'paragraph',
  FRAGMENT = 'fragment',
  TOKEN = 'token',
  EMOJI_SHORTCODE = 'emoji-shortcode',
  EMOJI_SHORTCODE_STARTED = 'emoji-shortcode-started',
  NEW_LINE = 'new-line',
}

export type MarkdownTokenType = {
  type: TOKEN_TYPE;
  match: RegExp;
  /**
   * Block tokens can have other tokens inside it
   */
  block?: boolean;
  /**
   * If block, this is a starting token
   */
  start?: string;
  /**
   * If block, this is an ending token
   */
  end?: string;
  /**
   * Is this token should appear in a new line?
   */
  lineStart?: boolean;
  /**
   * Found the matches only after whitespace.
   */
  onlyAfterWS?: boolean;
};

export const markdownTokens: MarkdownTokenType[] = [
  {
    type: TOKEN_TYPE.LINK,
    match: new RegExp(/^(https?|ftps?):\/\/[^\s\/$.?#].[^\s]*/),
    //match: new RegExp(/(?:[a-z+.-]+:\/\/|www\.\S{3,})\S*/),
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.CODEBLOCK,
    match: new RegExp(/```.*\n(?:(?!```).*\n)+```/),
    start: '```',
    end: '```',
    lineStart: true,
  },
  {
    type: TOKEN_TYPE.CODEBLOCK_START,
    match: new RegExp(/```.*/),
    start: '```',
    lineStart: true,
  },
  {
    type: TOKEN_TYPE.CODEBLOCK_END,
    match: new RegExp(/```/),
    start: '```',
    lineStart: true,
  },
  {
    type: TOKEN_TYPE.INLINE_CODE,
    match: new RegExp(/`(?:(?!`).)*`/),
    start: '`',
    end: '`',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.EMOJI,
    match: new RegExp(emojiRegex.default, ''), // cut the global flag
  },
  {
    type: TOKEN_TYPE.EMOJI_SHORTCODE,
    match: new RegExp(/:[a-z_]+:/),
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.EMOJI_SHORTCODE_STARTED,
    match: new RegExp(/:[a-z_]+/),
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.QUOTES,
    match: new RegExp(/^>.*/),
    block: true,
    start: '>',
    lineStart: true,
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.BOLD_ITALICS,
    match: new RegExp(/\*\*\*(?:(?!\*\*\*).)*\*\*\*/),
    block: true,
    start: '***',
    end: '***',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.BOLD,
    match: new RegExp(/\*\*(?:(?!\*\*).)*\*\*/),
    block: true,
    start: '**',
    end: '**',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.ITALICS,
    match: new RegExp(/\*(?:(?!\*).)*\*/),
    block: true,
    start: '*',
    end: '*',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.UNDERLINE_ITALICS,
    match: new RegExp(/___(?:(?!___).)*___/),
    block: true,
    start: '___',
    end: '___',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.UNDERLINE,
    match: new RegExp(/__(?:(?!__).)*__/),
    block: true,
    start: '__',
    end: '__',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.ITALICS,
    match: new RegExp(/_(?:(?!_).)*_/),
    block: true,
    start: '_',
    end: '_',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.STRIKETHROUGH,
    match: new RegExp(/~~(?:(?!~~).)*~~/),
    block: true,
    start: '~~',
    end: '~~',
    onlyAfterWS: true,
  },
  {
    type: TOKEN_TYPE.NEW_LINE,
    match: new RegExp(/\n/),
  },
];

const isTokenBlock: { [key in TOKEN_TYPE]?: boolean } = {};
markdownTokens.forEach((t) => {
  isTokenBlock[t.type] = t.block;
});

export type MarkdownToken = { text: string; tokens: TOKEN_TYPE[] };

export const tokenizeMarkdownLines: (line: string) => MarkdownToken[] = (line) => {
  return parseSection(line, []);
};

const parseSection: (section: string, alreadyUsedTokens: TOKEN_TYPE[]) => MarkdownToken[] = (
  section,
  alreadyUsedTokens = []
) => {
  let result = [];
  let m = matchMarkdown(section, alreadyUsedTokens);
  if (m) {
    let leftPart = section.slice(0, m.match.index); // there is no match on the left or it would be the match
    let middlePartLeftToken = m.token.start;
    let middlePart = m.match[0].slice(
      m.token.start ? m.token.start.length : 0, // cut the left token
      m.token.end ? -1 * m.token.end.length : m.match[0].length // cut the right token
    );
    let middlePartRightToken = m.token.end;
    let rightPart = section.slice(m.match.index + m.match[0].length);

    if (leftPart.length > 0) {
      result.push({ text: leftPart, tokens: alreadyUsedTokens });
    }
    if (middlePartLeftToken) {
      result.push({ text: middlePartLeftToken, tokens: [TOKEN_TYPE.TOKEN] });
    }
    // check the middlepart inside tokens
    if (middlePart.length > 0) {
      if (m.token.block) {
        result = result.concat(parseSection(middlePart, [m.token.type].concat(alreadyUsedTokens)));
      } else {
        result.push({ text: middlePart, tokens: [m.token.type] });
      }
    }
    if (middlePartRightToken) {
      result.push({ text: middlePartRightToken, tokens: [TOKEN_TYPE.TOKEN] });
    }
    if (rightPart.length > 0) {
      result = result.concat(parseSection(rightPart, [].concat(alreadyUsedTokens)));
    }
  } else {
    result.push({ text: section, tokens: alreadyUsedTokens });
  }

  return result;
};

/**
 * Check all tokens, select the most left sided match on the string
 */
const matchMarkdown: (
  text: string,
  alreadyUsedTokens: TOKEN_TYPE[]
) => { match: RegExpMatchArray; token: MarkdownTokenType } = (text, alreadyUsedTokens) => {
  let matches = [];
  for (let i = 0; i < markdownTokens.length; i++) {
    if (!alreadyUsedTokens.includes(markdownTokens[i].type)) {
      let m = text.match(markdownTokens[i].match);
      if (m && (!markdownTokens[i].onlyAfterWS || m.index == 0 || text[m.index - 1].match(/\s/))) {
        matches.push({ match: m, token: markdownTokens[i] });
      }
    }
  }

  matches.sort((a, b) => {
    return a.match.index - b.match.index;
  });

  return matches[0];
};

export type MarkdownTokenTree = {
  token?: TOKEN_TYPE;
  children?: MarkdownTokenTree[];
  text?: string;
};

export const parseLineToTree: (line: string) => MarkdownTokenTree = (line) => {
  return {
    children: buildTokenTree(line),
    token: TOKEN_TYPE.PARAGRAPH,
  };
};

export const buildTokenTree: (line: string) => MarkdownTokenTree[] = (line) => {
  let nextPart = line;
  let result = [];
  while (nextPart && nextPart.length > 0) {
    let m = matchMarkdown(nextPart, []);

    if (m) {
      let leftPart = nextPart.slice(0, m.match.index); // there is no match on the left or it would be the match
      let middlePartLeftToken = m.token.start;
      let middlePart = m.match[0].slice(
        m.token.start ? m.token.start.length : 0, // cut the left token
        m.token.end ? -1 * m.token.end.length : m.match[0].length // cut the right token
      );
      let middlePartRightToken = m.token.end;
      let rightPart = nextPart.slice(m.match.index + m.match[0].length);

      let tree: MarkdownTokenTree = {
        children: [],
        token: m.token.type,
      };
      if (leftPart.length > 0) {
        result.push({ text: leftPart });
      }
      if (middlePartLeftToken) {
        tree.children.push({ token: TOKEN_TYPE.TOKEN, text: middlePartLeftToken });
      }

      // check the middlepart inside tokens
      if (middlePart.length > 0) {
        if (m.token.block) {
          tree.children = tree.children.concat(buildTokenTree(middlePart));
        } else {
          tree.text = middlePart;
        }
      }
      if (middlePartRightToken) {
        tree.children.push({ token: TOKEN_TYPE.TOKEN, text: middlePartRightToken });
      }
      nextPart = rightPart;

      result.push(tree);
    } else {
      result.push({ text: nextPart });
      nextPart = '';
    }
  }
  return result;
};
