twemoji/2/utils/generate

#!/usr/bin/env node

 /*! Copyright Twitter Inc. and other contributors. Licensed under MIT *//*
     https://github.com/twitter/twemoji/blob/gh-pages/LICENSE
 */

// dependencies
var fs = require('fs');
var http = require('http');
var path = require('path');

function file(which) {
  return path.join(__dirname, '../..', which);
}

// Twitter assets by property name
var assets = {
  '2/72x72': [],
  '2/svg': []
};

var skinToneOptions = [
  '\\ud83c\\udffb',
  '\\ud83c\\udffc',
  '\\ud83c\\udffd',
  '\\ud83c\\udffe',
  '\\ud83c\\udfff'
];

// white spaces we don't want to catch via the RegExp
// there is no asset equivalent for these
var ignoreMissing = ['2002', '2003', '2005'];


// basic utilities to convert codepoints to JSON strings
function toJSON(codePoints) {
  return codePoints.split('-').map(function (point) {
    return UTF162JSON(fromCodePoint(point));
  }).join('');
}
function fromCodePoint(codepoint) {
  var code = typeof codepoint === 'string' ?
        parseInt(codepoint, 16) : codepoint;
  if (code < 0x10000) {
    return String.fromCharCode(code);
  }
  code -= 0x10000;
  return String.fromCharCode(
    0xD800 + (code >> 10),
    0xDC00 + (code & 0x3FF)
  );
}
function UTF162JSON(text) {
  for (var i = 0, r = []; i < text.length; i++) {
    r.push('\\u' + ('000' + text.charCodeAt(i).toString(16)).slice(-4));
  }
  return r.join('');
}

// Items is an array of unicode sequences with \u escaping, like ["\u2963\ufe0f", "\u263a\ufe0f"]
// items get sorted by length (long to short), then unicode hex values (low to high)
// output is "or" ed together using | for regex
// ouput also combines adjacent items using character classes with ranges when they have common prefixes
// Example: "aab", "aac", "aad", "aag", "ba" becomes "aa[b-dg]|ba"
function generateRegexPartial(items) {
  var currentPrefix = null;
  var result = [];
  var charClass = [];
  var charRange = [];
  items.map(function (item) {
    // Convert from "\u2963\ufe0f" into ["2963", "fe0f"]
    return item.split('\\u').slice(1);
  }).sort(sortMethod).forEach(function (itemParts) {
    var prefix = itemParts.slice(0, -1).join('\\u');
    if (prefix) {
      prefix = '\\u' + prefix;
    }
    var suffix = itemParts.slice(-1);
    if (prefix !== currentPrefix) {
      flushCharClass();
    }
    currentPrefix = prefix;
    var suffixMinusOne = UTF162JSON(String.fromCharCode(parseInt(suffix, 16) - 1));

    if (charRange.length && charRange.slice(-1)[0] !== suffixMinusOne) {
      flushCharRange();
    }
    charRange.push('\\u' + suffix);
  });

  flushCharClass();
  return result.join('|');

  // a and b are arrays of hex UCS-2 units
  function sortMethod(a, b) {
    return !a.length ? 0 :
      b.length - a.length ||
      parseInt(a[0], 16) - parseInt(b[0], 16) ||
      sortMethod(a.slice(1), b.slice(1)
    );
  }

  function flushCharRange() {
    charClass = charClass.concat((charRange.length < 3) ?
      charRange :
      [ charRange[0], '-', charRange.slice(-1)[0] ]
    );
    charRange = [];
  }

  function flushCharClass() {
    flushCharRange();
    if (charClass.length) {
      result.push(currentPrefix + (charClass.length == 1 ?
        charClass[0] :
        '[' + charClass.join('') + ']'
      ));
    }
    charClass = [];
    currentPrefix = null;
  }
}

// basic utility to organize async code
// see: http://webreflection.blogspot.co.uk/2012/03/tweet-sized-queue-system.html
// or:  http://webreflection.blogspot.co.uk/2012/06/working-with-queues.html
function Queue(args, f) {
  setTimeout(args.next = function next() {
    return (f = args.shift()) ? !!f(args) || !0 : !1;
  }, 0);
  return args;
}

// main task
Queue([

  // will populate assets arrays
  function grabAllAssets(q) {
    console.log('analyzing all assets ... ');
    // per each path/folder
    Object.keys(assets).forEach(function (path, i, paths) {
      // grab all files in that folder
      fs.readdir(file(path), function (err, files) {
        // and add them to the assets path
        assets[path].push.apply(
          assets[path],
          files.map(upperCaseWithoutExtension)
        );
        // once all assets arrays have been populated
        if (paths.reduce(completed, true)) {
          console.log('[INFO] assets contains ' + assets[path].length + ' emoji.');
          q.next();
        }
      });
    });
    // drop extension + uppercase
    function upperCaseWithoutExtension(file) {
      return file.slice(0, file.lastIndexOf('.')).toUpperCase();
    }
    // returns true if all assets have been populated
    function completed(p, c) {
      return p && assets[c].length;
    }
  },

  // will fetch and store all emoji from unicode.org
  function fetchEmojiSources(q) {
    console.log('fetching EmojiSources.txt ... ');
    // grab all emoji and test them against them
    http.get("http://www.unicode.org/Public/UNIDATA/EmojiSources.txt", function (res) {
      var chunks = [];
      // if all good ...
      if (res.statusCode === 200) {
        // grab all data
        res.on('data', chunks.push.bind(chunks));
        // once done ...
        res.on('end', function () {
          console.log('analyzing EmojiSources VS our assets ... ');
          // store all missing assets in one object
          var missing = {};
          // will be used to store an array with all missing
          var missingGrouped = {};

          // will be needed later on
          // parse it, clean it, and store it once
          q.emojiSource = chunks
            .join('')
            .split(/\r\n|\r|\n/)
            // filter once
            .filter(function (line) {
              return this.test(line);
            }, /^[0-9A-F]/)
            // take only emoji info
            .map(function (codePoint) {
              return codePoint
                .slice(0, codePoint.indexOf(';'))
                .toUpperCase()
                // drop spaces
                .replace(/\s+/g, '-')
                // drop 0 padded prefixes
                .replace(/^0+/g, '');
            });

          console.log('[INFO] parsed ' + q.emojiSource.length + ' standard emoji.');

          // find out which one is missing from our assets
          q.emojiSource.forEach(
            function (emoji) {
              // do not loop for emoji we know we should ignore
              if (ignoreMissing.indexOf(emoji) < 0) {
                // verify all others per each folder
                this.forEach(function (path) {
                  if (assets[path].indexOf(emoji) < 0) {
                    (missing[path] || (missing[path] = [])).push(emoji);
                    missingGrouped[emoji] = true;
                  }
                });
              }
            },
            // and per each folder
            Object.keys(assets)
          );

          // if some missing emoji has been found
          if (Object.keys(missing).length) {
            // warn and show which one is missing
            console.warn('[WARNING] missing assets for:');
            console.log(missing);
          }
          // create the array of all emoji we should ignore
          q.ignore = ignoreMissing.concat(Object.keys(missingGrouped));

          q.next();
        });
      } else {
        console.error('[ERROR] unable to fetch emoji at unicode.org');
        process.exit(1);
      }
    });
  },

  // grab the list of emoji that behave differently when
  // variants such \uFE0E and \uFE0F are in place
  function grabStandardVariants(q) {
    console.log('fetching StandardizedVariants.txt ... ');
    http.get(
      "http://unicode.org/Public/UNIDATA/StandardizedVariants.txt",
      function(res) {
        var chunks = [];
        if (res.statusCode == 200) {
          res.on('data', chunks.push.bind(chunks));
          res.on('end', function () {
            // cleaning up parsing sensitive emoji
            q.variantsSensitive = chunks
              .join('')                         // all content
              .split(/\r\n|\r|\n/)              // split in lines
              .filter(function (line) {         // containing FE0E; info
                return this.test(line);         // avoiding duplicated with FE0F
              }, / FE0E; text style/)
              .map(function (line) {            // cleaned up to grab
                return line.replace(this, '$1') // only first unicode
                        .toUpperCase();         // normalized as uppercase
              }, /^([0-9A-F]{4,}) FE0E;.+$/)    // sensitive char
            ;

            // iOS keyboard allows U+002A U+FE0F U+20E3 even though not a standardized variant (yet?)
            q.variantsSensitive.push('002A');
            // iOS keyboard allows U+2639 U+FE0F even though not a standardized variant (yet?)
            q.variantsSensitive.push('2639');

            console.log('[INFO] parsed ' + q.variantsSensitive.length + ' variant sensitive emoji.');
            q.next();

          });
        } else {
          console.error('[ERROR] unable to fetch standard variants at unicode.org');
          process.exit(1);
        }
      }
    );
  },

  // add our own assets that are not part of the Unicode standard
  function addMissingEmoji(q) {
    q.nonStandard = [];
    Object.keys(assets).forEach(function (path, i) {
      assets[path].forEach(function (emoji) {
        if (
          q.emojiSource.indexOf(emoji) < 0 &&
          q.nonStandard.indexOf(emoji) < 0
        ) {
          q.nonStandard.push(emoji);
        }
      });
    });

    if (q.nonStandard.length) {
      console.warn('[WARNING] assets contain ' + q.nonStandard.length + ' non standard emoji:');
      // console.log(q.nonStandard.join(', '));
    }

    q.emojiSource = q.emojiSource.concat(q.nonStandard)
    q.next();
  },

  // detect complete sets of five skin tones and a base
  function detectDiversityEmoji(q) {
    var isPresent = {};
    q.emojiSource.forEach(function (codePoints) {
      isPresent[codePoints] = true;
    });
    q.diversityBase = q.emojiSource.filter(function (codePoints) {
      // Start with the set of Emoji with the light skin tone
      return /-1F3FB$/.test(codePoints);
    }).map(function (codePoints) {
      // Take the skin tone off
      return codePoints.replace(/-1F3FB$/, '');
    }).filter(function (baseCodePoints) {
      // Verify that all other skin tones + no skin tone are present
      return ['-1F3FC', '-1F3FD', '-1F3FE', '-1F3FF', ''].every(function (suffix) {
        return isPresent[baseCodePoints + suffix];
      });
    });
    console.log('[INFO] parsed ' + q.diversityBase.length + ' diversity emoji.');
    q.next();
  },

  // detect complete sets of five skin tones and a base
  function partitionEmojiTypes(q) {
    console.log('partitioning emoji into types');
    q.zwj = [];
    q.diversity = [];
    q.sensitive = [];
    q.sensitiveKeycaps = [];
    q.diversitySensitive = [];
    q.regular = [];
    q.emojiSource.forEach(function (codePoints) {
      var u;
      var codePointsWithoutKeycap;
      codePoints = codePoints.replace(/\b[A-F0-9]+\b/g, function (hex) {
        // Pad all hex numbers to have at least 4 digits to match variantsSensitive
        return hex.length < 4 ? ('000' + hex).slice(-4) : hex;
      });
      if (q.ignore.indexOf(codePoints) < 0) {
        u = toJSON(codePoints);
        codePointsWithoutKeycap = codePoints.replace(/-20E3$/, '');
        if (codePoints.indexOf('200D') >= 0) {
          q.zwj.push(u);
        } else if (codePoints != codePointsWithoutKeycap && q.variantsSensitive.indexOf(codePointsWithoutKeycap) >= 0) {
          q.sensitiveKeycaps.push(toJSON(codePointsWithoutKeycap));
        } else if (q.diversityBase.indexOf(codePoints.replace(/-1F3F[B-F]$/, '')) >= 0) {
          // This is a diversity Emoji with or without a skin tone modifier
          // Add it to the regex if this is the base without the modifier
          if (q.diversityBase.indexOf(codePoints) >= 0) {
            if (q.variantsSensitive.indexOf(codePoints) < 0) {
              q.diversity.push(u);
            } else {
              q.diversitySensitive.push(u);
            }
          }
        } else if (q.variantsSensitive.indexOf(codePoints) < 0) {
          q.regular.push(u);
        } else {
          q.sensitive.push(u);
        }
      }
    });
    q.next();
  },

  function factorZwjSequences(q) {
    q.zwjCommonPatterns = [];

    // There are dozens of new ZWJ sequences that have common prefixes or suffixes with
    // skin tone + gender variations. To keep the main regex from growing excessively large and
    // slow, choose some common sub-expressions to factor.
    var commonPatterns = [
      {
        name: 'leading man/woman zwj with optional skin tone',
        re: '\\ud83d[\\udc68-\\udc69](?:\\ud83c[\\udffb-\\udfff])?\\u200d(.+?)',
        numCombinations: 12
      }, {
        name: 'variant or skin tone before trailing female/male zwj',
        re: '(.+?)(?:\\ufe0f|\\ud83c[\\udffb-\\udfff])\\u200d[\\u2640\\u2642]\\ufe0f',
        numCombinations: 12
      }, {
        name: 'optional skin tone before trailing female/male zwj',
        re: '(.+?)(?:\\ud83c[\\udffb-\\udfff])?\\u200d[\\u2640\\u2642]\\ufe0f',
        numCombinations: 12
      }
    ];

    commonPatterns.forEach(function(pattern) {
      var mapOfMatches = {};
      var re = new RegExp('^' + pattern.re + '$');
      q.zwj.forEach(function(jsonString) {
        var rawString = JSON.parse('"' + jsonString + '"');
        var match = rawString.match(re);
        if (match) {
          var key = match[1];
          mapOfMatches[key] = mapOfMatches[key] || [];
          mapOfMatches[key].push(match[0]);
        }
      });
      var replacements = [];
      Object.keys(mapOfMatches).forEach(function(key) {
        var matches = mapOfMatches[key];
        // Only a complete set may be replaced
        if (matches.length === pattern.numCombinations) {
          replacements.push(UTF162JSON(key));
          // Remove all items in the match set from the original zwj list
          matches.forEach(function(rawString) {
            var indexToRemove = q.zwj.indexOf(UTF162JSON(rawString));
            if (indexToRemove >= 0) {
              q.zwj.splice(indexToRemove, 1);
            }
          });
        }
      });
      if (replacements.length) {
        // Replace the wildcard section of the regex with a regex group of replacements
        var re = pattern.re.replace('(.+?', '(?:' + generateRegexPartial(replacements));
        q.zwjCommonPatterns.push(re);
        console.log('Refactoring ' + replacements.length + ' complete sets of ' + pattern.numCombinations + ' zwj from ' + pattern.name);
      } else {
        console.log('did not find any complete sets of ' + pattern.name);
      }
    });

    q.next();
  },

  // with all info, generate a RegExp that will catch
  // only standard emoji that are present in our assets
  function generateRegExp(q) {
    console.log('generating a RegExp for available assets');
    q.re = '';

    // The Zero-width joiner common patterns, if present, need to come first
    if (q.zwjCommonPatterns.length) {
      q.re += q.zwjCommonPatterns.join('|') + '|';
    }

    // Then the rest of the zwjs
    if (q.zwj.length) {
      q.re += generateRegexPartial(q.zwj) + '|';
    }

    // Group the variant sensitive keycaps
    if (q.sensitiveKeycaps.length) {
      q.re += '(?:' + generateRegexPartial(q.sensitiveKeycaps) + ')\\ufe0f?\\u20e3|';
    }

    // Next, add the diversity enabled Emoji that may include a skin tone suffix
    if (q.diversity.length + q.diversitySensitive.length) {
      q.re += '(?:';
      if (q.diversitySensitive.length) {
        // Some diversity are sensitive to variants
        q.re += '(?:' + generateRegexPartial(q.diversitySensitive) + ')(?:\\ufe0f|(?!\\ufe0e))';
        if (q.diversity.length) {
          q.re += '|';
        }
      }
      q.re += generateRegexPartial(q.diversity) + ')(?:' + generateRegexPartial(skinToneOptions) + '|)|';
    }

    // Next, the normal Emoji
    q.re += generateRegexPartial(q.regular) + '|';

    // Finally, add the rest of the sensitive ones that may be followed by U+FE0F but not U+FE0E
    q.re += '(?:' + generateRegexPartial(q.sensitive) + ')(?:\\ufe0f|(?!\\ufe0e))';
    q.next();
  },

  function generateFile(q) {
    console.log('generating ./twemoji.js');
    createTwemoji(q.re);
    require('./create-dist');
  }

]);


function createTwemoji(re) {
  fs.writeFileSync(
    file('2/twemoji.js'),
    '/*jslint indent: 2, browser: true, bitwise: true, plusplus: true */\n' +
    'var twemoji = (' +
    function (
      /*! Copyright Twitter Inc. and other contributors. Licensed under MIT *//*
        https://github.com/twitter/twemoji/blob/gh-pages/LICENSE
      */

      // WARNING:   this file is generated automatically via
      //            `node twemoji-generator.js`
      //            please update its `createTwemoji` function
      //            at the bottom of the same file instead.

    ) {
      'use strict';

      /*jshint maxparams:4 */

      var
        // the exported module object
        twemoji = {


        /////////////////////////
        //      properties     //
        /////////////////////////

          // default assets url, by default will be Twitter Inc. CDN
          base: 'https://twemoji.maxcdn.com/2/',

          // default assets file extensions, by default '.png'
          ext: '.png',

          // default assets/folder size, by default "72x72"
          // available via Twitter CDN: 72
          size: '72x72',

          // default class name, by default 'emoji'
          className: 'emoji',

          // basic utilities / helpers to convert code points
          // to JavaScript surrogates and vice versa
          convert: {

            /**
             * Given an HEX codepoint, returns UTF16 surrogate pairs.
             *
             * @param   string  generic codepoint, i.e. '1F4A9'
             * @return  string  codepoint transformed into utf16 surrogates pair,
             *          i.e. \uD83D\uDCA9
             *
             * @example
             *  twemoji.convert.fromCodePoint('1f1e8');
             *  // "\ud83c\udde8"
             *
             *  '1f1e8-1f1f3'.split('-').map(twemoji.convert.fromCodePoint).join('')
             *  // "\ud83c\udde8\ud83c\uddf3"
             */
            fromCodePoint: fromCodePoint,

            /**
             * Given UTF16 surrogate pairs, returns the equivalent HEX codepoint.
             *
             * @param   string  generic utf16 surrogates pair, i.e. \uD83D\uDCA9
             * @param   string  optional separator for double code points, default='-'
             * @return  string  utf16 transformed into codepoint, i.e. '1F4A9'
             *
             * @example
             *  twemoji.convert.toCodePoint('\ud83c\udde8\ud83c\uddf3');
             *  // "1f1e8-1f1f3"
             *
             *  twemoji.convert.toCodePoint('\ud83c\udde8\ud83c\uddf3', '~');
             *  // "1f1e8~1f1f3"
             */
            toCodePoint: toCodePoint
          },


        /////////////////////////
        //       methods       //
        /////////////////////////

          /**
           * User first: used to remove missing images
           * preserving the original text intent when
           * a fallback for network problems is desired.
           * Automatically added to Image nodes via DOM
           * It could be recycled for string operations via:
           *  $('img.emoji').on('error', twemoji.onerror)
           */
          onerror: function onerror() {
            if (this.parentNode) {
              this.parentNode.replaceChild(createText(this.alt), this);
            }
          },

          /**
           * Main method/logic to generate either <img> tags or HTMLImage nodes.
           *  "emojify" a generic text or DOM Element.
           *
           * @overloads
           *
           * String replacement for `innerHTML` or server side operations
           *  twemoji.parse(string);
           *  twemoji.parse(string, Function);
           *  twemoji.parse(string, Object);
           *
           * HTMLElement tree parsing for safer operations over existing DOM
           *  twemoji.parse(HTMLElement);
           *  twemoji.parse(HTMLElement, Function);
           *  twemoji.parse(HTMLElement, Object);
           *
           * @param   string|HTMLElement  the source to parse and enrich with emoji.
           *
           *          string              replace emoji matches with <img> tags.
           *                              Mainly used to inject emoji via `innerHTML`
           *                              It does **not** parse the string or validate it,
           *                              it simply replaces found emoji with a tag.
           *                              NOTE: be sure this won't affect security.
           *
           *          HTMLElement         walk through the DOM tree and find emoji
           *                              that are inside **text node only** (nodeType === 3)
           *                              Mainly used to put emoji in already generated DOM
           *                              without compromising surrounding nodes and
           *                              **avoiding** the usage of `innerHTML`.
           *                              NOTE: Using DOM elements instead of strings should
           *                              improve security without compromising too much
           *                              performance compared with a less safe `innerHTML`.
           *
           * @param   Function|Object  [optional]
           *                              either the callback that will be invoked or an object
           *                              with all properties to use per each found emoji.
           *
           *          Function            if specified, this will be invoked per each emoji
           *                              that has been found through the RegExp except
           *                              those follwed by the invariant \uFE0E ("as text").
           *                              Once invoked, parameters will be:
           *
           *                                iconId:string     the lower case HEX code point
           *                                                  i.e. "1f4a9"
           *
           *                                options:Object    all info for this parsing operation
           *
           *                                variant:char      the optional \uFE0F ("as image")
           *                                                  variant, in case this info
           *                                                  is anyhow meaningful.
           *                                                  By default this is ignored.
           *
           *                              If such callback will return a falsy value instead
           *                              of a valid `src` to use for the image, nothing will
           *                              actually change for that specific emoji.
           *
           *
           *          Object              if specified, an object containing the following properties
           *
           *            callback   Function  the callback to invoke per each found emoji.
           *            base       string    the base url, by default twemoji.base
           *            ext        string    the image extension, by default twemoji.ext
           *            size       string    the assets size, by default twemoji.size
           *
           * @example
           *
           *  twemoji.parse("I \u2764\uFE0F emoji!");
           *  // I <img class="emoji" draggable="false" alt="❤️" src="/assets/2764.gif"> emoji!
           *
           *
           *  twemoji.parse("I \u2764\uFE0F emoji!", function(iconId, options) {
           *    return '/assets/' + iconId + '.gif';
           *  });
           *  // I <img class="emoji" draggable="false" alt="❤️" src="/assets/2764.gif"> emoji!
           *
           *
           * twemoji.parse("I \u2764\uFE0F emoji!", {
           *   size: 72,
           *   callback: function(iconId, options) {
           *     return '/assets/' + options.size + '/' + iconId + options.ext;
           *   }
           * });
           *  // I <img class="emoji" draggable="false" alt="❤️" src="/assets/72x72/2764.png"> emoji!
           *
           */
          parse: parse,

          /**
           * Given a string, invokes the callback argument
           *  per each emoji found in such string.
           * This is the most raw version used by
           *  the .parse(string) method itself.
           *
           * @param   string    generic string to parse
           * @param   Function  a generic callback that will be
           *                    invoked to replace the content.
           *                    This calback wil receive standard
           *                    String.prototype.replace(str, callback)
           *                    arguments such:
           *  callback(
           *    rawText,  // the emoji match
           *  );
           *
           *                    and others commonly received via replace.
           */
          replace: replace,

          /**
           * Simplify string tests against emoji.
           *
           * @param   string  some text that might contain emoji
           * @return  boolean true if any emoji was found, false otherwise.
           *
           * @example
           *
           *  if (twemoji.test(someContent)) {
           *    console.log("emoji All The Things!");
           *  }
           */
          test: test
        },

        // used to escape HTML special chars in attributes
        escaper = {
          '&': '&amp;',
          '<': '&lt;',
          '>': '&gt;',
          "'": '&#39;',
          '"': '&quot;'
        },

        // RegExp based on emoji's official Unicode standards
        // http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
        re = /twemoji/,

        // avoid runtime RegExp creation for not so smart,
        // not JIT based, and old browsers / engines
        UFE0Fg = /\uFE0F/g,

        // avoid using a string literal like '\u200D' here because minifiers expand it inline
        U200D = String.fromCharCode(0x200D),

        // used to find HTML special chars in attributes
        rescaper = /[&<>'"]/g,

        // nodes with type 1 which should **not** be parsed (including lower case svg)
        shouldntBeParsed = /IFRAME|NOFRAMES|NOSCRIPT|SCRIPT|SELECT|STYLE|TEXTAREA|[a-z]/,

        // just a private shortcut
        fromCharCode = String.fromCharCode;

      return twemoji;


      /////////////////////////
      //  private functions  //
      //     declaration     //
      /////////////////////////

      /**
       * Shortcut to create text nodes
       * @param   string  text used to create DOM text node
       * @return  Node  a DOM node with that text
       */
      function createText(text) {
        return document.createTextNode(text);
      }

      /**
       * Utility function to escape html attribute text
       * @param   string  text use in HTML attribute
       * @return  string  text encoded to use in HTML attribute
       */
      function escapeHTML(s) {
        return s.replace(rescaper, replacer);
      }

      /**
       * Default callback used to generate emoji src
       *  based on Twitter CDN
       * @param   string    the emoji codepoint string
       * @param   string    the default size to use, i.e. "36x36"
       * @return  string    the image source to use
       */
      function defaultImageSrcGenerator(icon, options) {
        return ''.concat(options.base, options.size, '/', icon, options.ext);
      }

      /**
       * Given a generic DOM nodeType 1, walk through all children
       * and store every nodeType 3 (#text) found in the tree.
       * @param   Element a DOM Element with probably some text in it
       * @param   Array the list of previously discovered text nodes
       * @return  Array same list with new discovered nodes, if any
       */
      function grabAllTextNodes(node, allText) {
        var
          childNodes = node.childNodes,
          length = childNodes.length,
          subnode,
          nodeType;
        while (length--) {
          subnode = childNodes[length];
          nodeType = subnode.nodeType;
          // parse emoji only in text nodes
          if (nodeType === 3) {
            // collect them to process emoji later
            allText.push(subnode);
          }
          // ignore all nodes that are not type 1 or that
          // should not be parsed as script, style, and others
          else if (nodeType === 1 && !shouldntBeParsed.test(subnode.nodeName)) {
            grabAllTextNodes(subnode, allText);
          }
        }
        return allText;
      }

      /**
       * Used to both remove the possible variant
       *  and to convert utf16 into code points.
       *  If there is a zero-width-joiner (U+200D), leave the variants in.
       * @param   string    the raw text of the emoji match
       */
      function grabTheRightIcon(rawText) {
        // if variant is present as \uFE0F
        return toCodePoint(rawText.indexOf(U200D) < 0 ?
          rawText.replace(UFE0Fg, '') :
          rawText
        );
      }

      /**
       * DOM version of the same logic / parser:
       *  emojify all found sub-text nodes placing images node instead.
       * @param   Element   generic DOM node with some text in some child node
       * @param   Object    options  containing info about how to parse
        *
        *            .callback   Function  the callback to invoke per each found emoji.
        *            .base       string    the base url, by default twemoji.base
        *            .ext        string    the image extension, by default twemoji.ext
        *            .size       string    the assets size, by default twemoji.size
        *
       * @return  Element same generic node with emoji in place, if any.
       */
      function parseNode(node, options) {
        var
          allText = grabAllTextNodes(node, []),
          length = allText.length,
          attrib,
          attrname,
          modified,
          fragment,
          subnode,
          text,
          match,
          i,
          index,
          img,
          rawText,
          iconId,
          src;
        while (length--) {
          modified = false;
          fragment = document.createDocumentFragment();
          subnode = allText[length];
          text = subnode.nodeValue;
          i = 0;
          while ((match = re.exec(text))) {
            index = match.index;
            if (index !== i) {
              fragment.appendChild(
                createText(text.slice(i, index))
              );
            }
            rawText = match[0];
            iconId = grabTheRightIcon(rawText);
            i = index + rawText.length;
            src = options.callback(iconId, options);
            if (src) {
              img = new Image();
              img.onerror = options.onerror;
              img.setAttribute('draggable', 'false');
              attrib = options.attributes(rawText, iconId);
              for (attrname in attrib) {
                if (
                  attrib.hasOwnProperty(attrname) &&
                  // don't allow any handlers to be set + don't allow overrides
                  attrname.indexOf('on') !== 0 &&
                  !img.hasAttribute(attrname)
                ) {
                  img.setAttribute(attrname, attrib[attrname]);
                }
              }
              img.className = options.className;
              img.alt = rawText;
              img.src = src;
              modified = true;
              fragment.appendChild(img);
            }
            if (!img) fragment.appendChild(createText(rawText));
            img = null;
          }
          // is there actually anything to replace in here ?
          if (modified) {
            // any text left to be added ?
            if (i < text.length) {
              fragment.appendChild(
                createText(text.slice(i))
              );
            }
            // replace the text node only, leave intact
            // anything else surrounding such text
            subnode.parentNode.replaceChild(fragment, subnode);
          }
        }
        return node;
      }

      /**
       * String/HTML version of the same logic / parser:
       *  emojify a generic text placing images tags instead of surrogates pair.
       * @param   string    generic string with possibly some emoji in it
       * @param   Object    options  containing info about how to parse
       *
       *            .callback   Function  the callback to invoke per each found emoji.
       *            .base       string    the base url, by default twemoji.base
       *            .ext        string    the image extension, by default twemoji.ext
       *            .size       string    the assets size, by default twemoji.size
       *
       * @return  the string with <img tags> replacing all found and parsed emoji
       */
      function parseString(str, options) {
        return replace(str, function (rawText) {
          var
            ret = rawText,
            iconId = grabTheRightIcon(rawText),
            src = options.callback(iconId, options),
            attrib,
            attrname;
          if (src) {
            // recycle the match string replacing the emoji
            // with its image counter part
            ret = '<img '.concat(
              'class="', options.className, '" ',
              'draggable="false" ',
              // needs to preserve user original intent
              // when variants should be copied and pasted too
              'alt="',
              rawText,
              '"',
              ' src="',
              src,
              '"'
            );
            attrib = options.attributes(rawText, iconId);
            for (attrname in attrib) {
              if (
                attrib.hasOwnProperty(attrname) &&
                // don't allow any handlers to be set + don't allow overrides
                attrname.indexOf('on') !== 0 &&
                ret.indexOf(' ' + attrname + '=') === -1
              ) {
                ret = ret.concat(' ', attrname, '="', escapeHTML(attrib[attrname]), '"');
              }
            }
            ret = ret.concat('>');
          }
          return ret;
        });
      }

      /**
       * Function used to actually replace HTML special chars
       * @param   string  HTML special char
       * @return  string  encoded HTML special char
       */
      function replacer(m) {
        return escaper[m];
      }

      /**
       * Default options.attribute callback
       * @return  null
       */
      function returnNull() {
        return null;
      }

      /**
       * Given a generic value, creates its squared counterpart if it's a number.
       *  As example, number 36 will return '36x36'.
       * @param   any     a generic value.
       * @return  any     a string representing asset size, i.e. "36x36"
       *                  only in case the value was a number.
       *                  Returns initial value otherwise.
       */
      function toSizeSquaredAsset(value) {
        return typeof value === 'number' ?
          value + 'x' + value :
          value;
      }


      /////////////////////////
      //  exported functions //
      //     declaration     //
      /////////////////////////

      function fromCodePoint(codepoint) {
        var code = typeof codepoint === 'string' ?
              parseInt(codepoint, 16) : codepoint;
        if (code < 0x10000) {
          return fromCharCode(code);
        }
        code -= 0x10000;
        return fromCharCode(
          0xD800 + (code >> 10),
          0xDC00 + (code & 0x3FF)
        );
      }

      function parse(what, how) {
        if (!how || typeof how === 'function') {
          how = {callback: how};
        }
        // if first argument is string, inject html <img> tags
        // otherwise use the DOM tree and parse text nodes only
        return (typeof what === 'string' ? parseString : parseNode)(what, {
          callback:   how.callback || defaultImageSrcGenerator,
          attributes: typeof how.attributes === 'function' ? how.attributes : returnNull,
          base:       typeof how.base === 'string' ? how.base : twemoji.base,
          ext:        how.ext || twemoji.ext,
          size:       how.folder || toSizeSquaredAsset(how.size || twemoji.size),
          className:  how.className || twemoji.className,
          onerror:    how.onerror || twemoji.onerror
        });
      }

      function replace(text, callback) {
        return String(text).replace(re, callback);
      }

      function test(text) {
        // IE6 needs a reset before too
        re.lastIndex = 0;
        var result = re.test(text);
        re.lastIndex = 0;
        return result;
      }

      function toCodePoint(unicodeSurrogates, sep) {
        var
          r = [],
          c = 0,
          p = 0,
          i = 0;
        while (i < unicodeSurrogates.length) {
          c = unicodeSurrogates.charCodeAt(i++);
          if (p) {
            r.push((0x10000 + ((p - 0xD800) << 10) + (c - 0xDC00)).toString(16));
            p = 0;
          } else if (0xD800 <= c && c <= 0xDBFF) {
            p = c;
          } else {
            r.push(c.toString(16));
          }
        }
        return r.join(sep || '-');
      }

    }.toString()
      // drop current indentation
      .replace(/^    /gm, '')
      // add the RegExp in the right place
      .replace('re = /twemoji/', 're = /' + re + '/g')
      // add the full license
      .replace('/*! (C) Twitter Inc. */',
        '/*! (C) Twitter Inc. *//*\n' +
        fs.readFileSync(path.join(__dirname, '../../', 'LICENSE')).toString().replace(
          /^./gm, '   '
        ) +
        '\n  */'
      ) + '());');
}