/*
 * @file generate ascii-safe json string (no unicode/emoji character)
 */

// Reference: https://unicode.org/faq/utf_bom.html#utf16-2
// high surrogates (the first of the surrogate pair) is [\ud800-\udbff]
// low surrogates is [\udc00-\udfff]: https://charbase.com/block/low-surrogates
// the combined range is [\ud800-\udfff]

// match either a pair that goes together or 1 value inside the combined range
const SURROGATES_REGEX = /[\ud800-\udbff][\udc00-\udfff]|[\ud800-\udfff]/g

// \u007F-\uFFFF characters are non-ascii, escape them
const NON_ASCII_REGEX = /[\u007F-\uFFFF]/g

// Some characters (e.g. emojis) in Unicode has high code point so they cannot be stored in 2 bytes.
// They will be represented by a pair of UTF-16 characters (2 bytes each), called surrogate pairs, e.g. 🥱 is \ud83e\udd71
// This together with a many different locale input software sometimes mess things up and create only half of the surrogate pairs
// This is okay in the frontend, javascript will just show invalid characters like \udd71 as �
// But the backend is not as lenient it will mark them as invalid and throw an error => we need to handle them
function removeLoneUnicodeSurrogates(str: string): string {
  return str.replace(SURROGATES_REGEX, function (matchedValue: string): string {
    // if we match a surrogate pair that goes together (length > 2), keep it
    // else remove the lone surrogate value (we can also replace it with � to match web browsers' behavior - the valid unicode value � for is \ufffd)
    return matchedValue.length > 1 ? matchedValue : ''
  })

  // an alternative to the way above is to match stand alone surrogate values with /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/
  // Reference: https://gist.github.com/mathiasbynens/bbe7f870208abcfec860
}

// custom replacer function for JSON.stringify // goodcheck-disable-line
function invalidUnicodeReplacer(key: string, value: any): any {
  // if it's a string, try to remove invalid unicode from it
  if (typeof value === 'string') {
    return removeLoneUnicodeSurrogates(value)
  }
  // else don't touch the value and stringify as usual
  return value
}

function escapeNonAscii(str: string): string {
  return str.replace(NON_ASCII_REGEX, function (chr) {
    // Reference: https://stackoverflow.com/questions/31649362/json-stringify-and-unicode-characters
    return '\\u' + ('0000' + chr.charCodeAt(0).toString(16)).substr(-4)
  })
}

function asciiSafeStringify(obj: any): string {
  // order is important here, do unicode related things first before escaping non-ascii, if not the unicode manipulations may fail
  return escapeNonAscii(JSON.stringify(obj, invalidUnicodeReplacer)) // goodcheck-disable-line
}

// Returns true if it is a DOM node
function isNode(obj: any): boolean {
  return typeof Node === 'object'
    ? obj instanceof Node
    : obj != null && typeof obj === 'object' && typeof obj.nodeType === 'number' && typeof obj.nodeName === 'string'
}

// Returns true if it is a DOM element
function isElement(obj: any): boolean {
  return typeof HTMLElement === 'object'
    ? obj instanceof HTMLElement // DOM2
    : obj != null && typeof obj === 'object' && obj !== null && obj.nodeType === 1 && typeof obj.nodeName === 'string'
}

// Circular replacer for JSON.stringify for complex objects that are // goodcheck-disable-line
// being sent to track (e.g. complex objects inside stores)
// Also replaces DOM nodes and elements with their string representation since they can't be stringified (with .toJSON())
function getCircularReplacer(): (key: any, value: any) => any {
  const seen = new WeakSet()
  return (key: any, value: any): any => {
    if (typeof value === 'object' && value !== null) {
      if (seen.has(value)) {
        return
      }
      seen.add(value)
    }
    if (isNode(value) || isElement(value)) {
      return value.toString()
    }
    return value
  }
}

function circularSafeStringify(obj: any): string {
  return JSON.stringify(obj, getCircularReplacer()) // goodcheck-disable-line
}

function asciiSafeAndCircularSafeStringify(obj: any): string {
  return escapeNonAscii(circularSafeStringify(obj))
}

export { asciiSafeAndCircularSafeStringify, asciiSafeStringify, circularSafeStringify }
