1
0
mirror of https://github.com/twitter/twemoji.git synced 2024-12-27 03:16:23 +00:00
twemoji/2/scripts/generate
2018-07-10 11:29:36 -07:00

1044 lines
37 KiB
JavaScript
Executable File

#!/usr/bin/env node
/*! Copyright Twitter Inc. and other contributors. Licensed under MIT *//*
https://github.com/twitter/twemoji/blob/gh-pages/LICENSE
*/
// dependencies
var fs = require('fs');
var http = require('http');
var path = require('path');
var Utils = require('./utils');
function file(which) {
return path.join(__dirname, '../..', which);
}
// Twitter assets by property name
var assets = {
'2/72x72': [],
'2/svg': []
};
var skinToneOptions = [
'\\ud83c\\udffb',
'\\ud83c\\udffc',
'\\ud83c\\udffd',
'\\ud83c\\udffe',
'\\ud83c\\udfff'
];
// white spaces we don't want to catch via the RegExp
// there is no asset equivalent for these
var ignoreMissing = ['2002', '2003', '2005'];
// Items is an array of unicode sequences with \u escaping, like ["\u2963\ufe0f", "\u263a\ufe0f"]
// items get sorted by length (long to short), then unicode hex values (low to high)
// output is "or" ed together using | for regex
// ouput also combines adjacent items using character classes with ranges when they have common prefixes
// Example: "aab", "aac", "aad", "aag", "ba" becomes "aa[b-dg]|ba"
function generateRegexPartial(items) {
var currentPrefix = null;
var result = [];
var charClass = [];
var charRange = [];
items.map(function (item) {
// Convert from "\u2963\ufe0f" into ["2963", "fe0f"]
return item.split('\\u').slice(1);
}).sort(sortMethod).forEach(function (itemParts) {
var prefix = itemParts.slice(0, -1).join('\\u');
if (prefix) {
prefix = '\\u' + prefix;
}
var suffix = itemParts.slice(-1);
if (prefix !== currentPrefix) {
flushCharClass();
}
currentPrefix = prefix;
var suffixMinusOne = Utils.UTF162JSON(String.fromCharCode(parseInt(suffix, 16) - 1));
if (charRange.length && charRange.slice(-1)[0] !== suffixMinusOne) {
flushCharRange();
}
charRange.push('\\u' + suffix);
});
flushCharClass();
return result.join('|');
// a and b are arrays of hex UCS-2 units
function sortMethod(a, b) {
return !a.length ? 0 :
b.length - a.length ||
parseInt(a[0], 16) - parseInt(b[0], 16) ||
sortMethod(a.slice(1), b.slice(1)
);
}
function flushCharRange() {
charClass = charClass.concat((charRange.length < 3) ?
charRange :
[ charRange[0], '-', charRange.slice(-1)[0] ]
);
charRange = [];
}
function flushCharClass() {
flushCharRange();
if (charClass.length) {
result.push(currentPrefix + (charClass.length == 1 ?
charClass[0] :
'[' + charClass.join('') + ']'
));
}
charClass = [];
currentPrefix = null;
}
}
// basic utility to organize async code
// see: http://webreflection.blogspot.co.uk/2012/03/tweet-sized-queue-system.html
// or: http://webreflection.blogspot.co.uk/2012/06/working-with-queues.html
function Queue(args, f) {
setTimeout(args.next = function next() {
return (f = args.shift()) ? !!f(args) || !0 : !1;
}, 0);
return args;
}
// main task
Queue([
// will populate assets arrays
function grabAllAssets(q) {
console.log('analyzing all assets ... ');
// per each path/folder
Object.keys(assets).forEach(function (path, i, paths) {
// grab all files in that folder
fs.readdir(file(path), function (err, files) {
// and add them to the assets path
assets[path].push.apply(
assets[path],
files.map(upperCaseWithoutExtension)
);
// once all assets arrays have been populated
if (paths.reduce(completed, true)) {
console.log('[INFO] assets contains ' + assets[path].length + ' emoji.');
q.next();
}
});
});
// drop extension + uppercase
function upperCaseWithoutExtension(file) {
return file.slice(0, file.lastIndexOf('.')).toUpperCase();
}
// returns true if all assets have been populated
function completed(p, c) {
return p && assets[c].length;
}
},
// will fetch and store all emoji from unicode.org
function fetchEmojiSources(q) {
console.log('fetching EmojiSources.txt ... ');
// grab all emoji and test them against them
http.get("http://www.unicode.org/Public/UNIDATA/EmojiSources.txt", function (res) {
var chunks = [];
// if all good ...
if (res.statusCode === 200) {
// grab all data
res.on('data', chunks.push.bind(chunks));
// once done ...
res.on('end', function () {
console.log('analyzing EmojiSources VS our assets ... ');
// store all missing assets in one object
var missing = {};
// will be used to store an array with all missing
var missingGrouped = {};
// will be needed later on
// parse it, clean it, and store it once
q.emojiSource = chunks
.join('')
.split(/\r\n|\r|\n/)
// filter once
.filter(function (line) {
return this.test(line);
}, /^[0-9A-F]/)
// take only emoji info
.map(function (codePoint) {
return codePoint
.slice(0, codePoint.indexOf(';'))
.toUpperCase()
// drop spaces
.replace(/\s+/g, '-')
// drop 0 padded prefixes
.replace(/^0+/g, '');
});
console.log('[INFO] parsed ' + q.emojiSource.length + ' standard emoji.');
// find out which one is missing from our assets
q.emojiSource.forEach(
function (emoji) {
// do not loop for emoji we know we should ignore
if (ignoreMissing.indexOf(emoji) < 0) {
// verify all others per each folder
this.forEach(function (path) {
if (assets[path].indexOf(emoji) < 0) {
(missing[path] || (missing[path] = [])).push(emoji);
missingGrouped[emoji] = true;
}
});
}
},
// and per each folder
Object.keys(assets)
);
// if some missing emoji has been found
if (Object.keys(missing).length) {
// warn and show which one is missing
console.warn('[WARNING] missing assets for:');
console.log(missing);
}
// create the array of all emoji we should ignore
q.ignore = ignoreMissing.concat(Object.keys(missingGrouped));
q.next();
});
} else {
console.error('[ERROR] unable to fetch emoji at unicode.org');
process.exit(1);
}
});
},
// grab the list of emoji that behave differently when
// variants such \uFE0E and \uFE0F are in place
function grabStandardVariants(q) {
console.log('fetching StandardizedVariants.txt ... ');
http.get(
"http://unicode.org/Public/UNIDATA/StandardizedVariants.txt",
function(res) {
var chunks = [];
if (res.statusCode == 200) {
res.on('data', chunks.push.bind(chunks));
res.on('end', function () {
// cleaning up parsing sensitive emoji
q.variantsSensitive = chunks
.join('') // all content
.split(/\r\n|\r|\n/) // split in lines
.filter(function (line) { // containing FE0E; info
return this.test(line); // avoiding duplicated with FE0F
}, / FE0E; text style/)
.map(function (line) { // cleaned up to grab
return line.replace(this, '$1') // only first unicode
.toUpperCase(); // normalized as uppercase
}, /^([0-9A-F]{4,}) FE0E;.+$/) // sensitive char
;
// iOS keyboard allows U+002A U+FE0F U+20E3 even though not a standardized variant (yet?)
q.variantsSensitive.push('002A');
// iOS keyboard allows U+2639 U+FE0F even though not a standardized variant (yet?)
q.variantsSensitive.push('2639');
console.log('[INFO] parsed ' + q.variantsSensitive.length + ' variant sensitive emoji.');
q.next();
});
} else {
console.error('[ERROR] unable to fetch standard variants at unicode.org');
process.exit(1);
}
}
);
},
// add our own assets that are not part of the Unicode standard
function addMissingEmoji(q) {
q.nonStandard = [];
Object.keys(assets).forEach(function (path, i) {
assets[path].forEach(function (emoji) {
if (
q.emojiSource.indexOf(emoji) < 0 &&
q.nonStandard.indexOf(emoji) < 0
) {
q.nonStandard.push(emoji);
}
});
});
if (q.nonStandard.length) {
console.warn('[WARNING] assets contain ' + q.nonStandard.length + ' non standard emoji:');
// console.log(q.nonStandard.join(', '));
}
q.emojiSource = q.emojiSource.concat(q.nonStandard)
q.next();
},
// detect complete sets of five skin tones and a base
function detectDiversityEmoji(q) {
var isPresent = {};
q.emojiSource.forEach(function (codePoints) {
isPresent[codePoints] = true;
});
q.diversityBase = q.emojiSource.filter(function (codePoints) {
// Start with the set of Emoji with the light skin tone
return /-1F3FB$/.test(codePoints);
}).map(function (codePoints) {
// Take the skin tone off
return codePoints.replace(/-1F3FB$/, '');
}).filter(function (baseCodePoints) {
// Verify that all other skin tones + no skin tone are present
return ['-1F3FC', '-1F3FD', '-1F3FE', '-1F3FF', ''].every(function (suffix) {
return isPresent[baseCodePoints + suffix];
});
});
console.log('[INFO] parsed ' + q.diversityBase.length + ' diversity emoji.');
q.next();
},
// detect complete sets of five skin tones and a base
function partitionEmojiTypes(q) {
console.log('partitioning emoji into types');
q.zwj = [];
q.diversity = [];
q.sensitive = [];
q.sensitiveKeycaps = [];
q.diversitySensitive = [];
q.regular = [];
q.emojiSource.forEach(function (codePoints) {
var u;
var codePointsWithoutKeycap;
codePoints = codePoints.replace(/\b[A-F0-9]+\b/g, function (hex) {
// Pad all hex numbers to have at least 4 digits to match variantsSensitive
return hex.length < 4 ? ('000' + hex).slice(-4) : hex;
});
if (q.ignore.indexOf(codePoints) < 0) {
u = Utils.toJSON(codePoints);
codePointsWithoutKeycap = codePoints.replace(/-20E3$/, '');
if (codePoints.indexOf('200D') >= 0) {
q.zwj.push(u);
} else if (codePoints != codePointsWithoutKeycap && q.variantsSensitive.indexOf(codePointsWithoutKeycap) >= 0) {
q.sensitiveKeycaps.push(Utils.toJSON(codePointsWithoutKeycap));
} else if (q.diversityBase.indexOf(codePoints.replace(/-1F3F[B-F]$/, '')) >= 0) {
// This is a diversity Emoji with or without a skin tone modifier
// Add it to the regex if this is the base without the modifier
if (q.diversityBase.indexOf(codePoints) >= 0) {
if (q.variantsSensitive.indexOf(codePoints) < 0) {
q.diversity.push(u);
} else {
q.diversitySensitive.push(u);
}
}
} else if (q.variantsSensitive.indexOf(codePoints) < 0) {
q.regular.push(u);
} else {
q.sensitive.push(u);
}
}
});
q.next();
},
function factorZwjSequences(q) {
q.zwjCommonPatterns = [];
// There are dozens of new ZWJ sequences that have common prefixes or suffixes with
// skin tone + gender variations. To keep the main regex from growing excessively large and
// slow, choose some common sub-expressions to factor.
var commonPatterns = [
{
name: 'leading man/woman zwj with optional skin tone',
re: '\\ud83d[\\udc68-\\udc69](?:\\ud83c[\\udffb-\\udfff])?\\u200d(.+?)',
numCombinations: 12
}, {
name: 'variant or skin tone before trailing female/male zwj',
re: '(.+?)(?:\\ufe0f|\\ud83c[\\udffb-\\udfff])\\u200d[\\u2640\\u2642]\\ufe0f',
numCombinations: 12
}, {
name: 'optional skin tone before trailing female/male zwj',
re: '(.+?)(?:\\ud83c[\\udffb-\\udfff])?\\u200d[\\u2640\\u2642]\\ufe0f',
numCombinations: 12
}
];
commonPatterns.forEach(function(pattern) {
var mapOfMatches = {};
var re = new RegExp('^' + pattern.re + '$');
q.zwj.forEach(function(jsonString) {
var rawString = JSON.parse('"' + jsonString + '"');
var match = rawString.match(re);
if (match) {
var key = match[1];
mapOfMatches[key] = mapOfMatches[key] || [];
mapOfMatches[key].push(match[0]);
}
});
var replacements = [];
Object.keys(mapOfMatches).forEach(function(key) {
var matches = mapOfMatches[key];
// Only a complete set may be replaced
if (matches.length === pattern.numCombinations) {
replacements.push(Utils.UTF162JSON(key));
// Remove all items in the match set from the original zwj list
matches.forEach(function(rawString) {
var indexToRemove = q.zwj.indexOf(Utils.UTF162JSON(rawString));
if (indexToRemove >= 0) {
q.zwj.splice(indexToRemove, 1);
}
});
}
});
if (replacements.length) {
// Replace the wildcard section of the regex with a regex group of replacements
var re = pattern.re.replace('(.+?', '(?:' + generateRegexPartial(replacements));
q.zwjCommonPatterns.push(re);
console.log('Refactoring ' + replacements.length + ' complete sets of ' + pattern.numCombinations + ' zwj from ' + pattern.name);
} else {
console.log('did not find any complete sets of ' + pattern.name);
}
});
q.next();
},
// with all info, generate a RegExp that will catch
// only standard emoji that are present in our assets
function generateRegExp(q) {
console.log('generating a RegExp for available assets');
q.re = '';
// The Zero-width joiner common patterns, if present, need to come first
if (q.zwjCommonPatterns.length) {
q.re += q.zwjCommonPatterns.join('|') + '|';
}
// Then the rest of the zwjs
if (q.zwj.length) {
q.re += generateRegexPartial(q.zwj) + '|';
}
// Group the variant sensitive keycaps
if (q.sensitiveKeycaps.length) {
q.re += '(?:' + generateRegexPartial(q.sensitiveKeycaps) + ')\\ufe0f?\\u20e3|';
}
// Next, add the diversity enabled Emoji that may include a skin tone suffix
if (q.diversity.length + q.diversitySensitive.length) {
q.re += '(?:';
if (q.diversitySensitive.length) {
// Some diversity are sensitive to variants
q.re += '(?:' + generateRegexPartial(q.diversitySensitive) + ')(?:\\ufe0f|(?!\\ufe0e))';
if (q.diversity.length) {
q.re += '|';
}
}
q.re += generateRegexPartial(q.diversity) + ')(?:' + generateRegexPartial(skinToneOptions) + '|)|';
}
// Next, the normal Emoji
q.re += generateRegexPartial(q.regular) + '|';
// Finally, add the rest of the sensitive ones that may be followed by U+FE0F but not U+FE0E
q.re += '(?:' + generateRegexPartial(q.sensitive) + ')(?:\\ufe0f|(?!\\ufe0e))';
q.next();
},
function generateFile(q) {
console.log('generating ./twemoji.js');
createTwemoji(q.re);
require('./create-dist');
}
]);
function createTwemoji(re) {
fs.writeFileSync(
file('2/twemoji.js'),
'/*jslint indent: 2, browser: true, bitwise: true, plusplus: true */\n' +
'var twemoji = (' +
function (
/*! Copyright Twitter Inc. and other contributors. Licensed under MIT *//*
https://github.com/twitter/twemoji/blob/gh-pages/LICENSE
*/
// WARNING: this file is generated automatically via
// `node twemoji-generator.js`
// please update its `createTwemoji` function
// at the bottom of the same file instead.
) {
'use strict';
/*jshint maxparams:4 */
var
// the exported module object
twemoji = {
/////////////////////////
// properties //
/////////////////////////
// default assets url, by default will be Twitter Inc. CDN
base: 'https://twemoji.maxcdn.com/2/',
// default assets file extensions, by default '.png'
ext: '.png',
// default assets/folder size, by default "72x72"
// available via Twitter CDN: 72
size: '72x72',
// default class name, by default 'emoji'
className: 'emoji',
// basic utilities / helpers to convert code points
// to JavaScript surrogates and vice versa
convert: {
/**
* Given an HEX codepoint, returns UTF16 surrogate pairs.
*
* @param string generic codepoint, i.e. '1F4A9'
* @return string codepoint transformed into utf16 surrogates pair,
* i.e. \uD83D\uDCA9
*
* @example
* twemoji.convert.fromCodePoint('1f1e8');
* // "\ud83c\udde8"
*
* '1f1e8-1f1f3'.split('-').map(twemoji.convert.fromCodePoint).join('')
* // "\ud83c\udde8\ud83c\uddf3"
*/
fromCodePoint: fromCodePoint,
/**
* Given UTF16 surrogate pairs, returns the equivalent HEX codepoint.
*
* @param string generic utf16 surrogates pair, i.e. \uD83D\uDCA9
* @param string optional separator for double code points, default='-'
* @return string utf16 transformed into codepoint, i.e. '1F4A9'
*
* @example
* twemoji.convert.toCodePoint('\ud83c\udde8\ud83c\uddf3');
* // "1f1e8-1f1f3"
*
* twemoji.convert.toCodePoint('\ud83c\udde8\ud83c\uddf3', '~');
* // "1f1e8~1f1f3"
*/
toCodePoint: toCodePoint
},
/////////////////////////
// methods //
/////////////////////////
/**
* User first: used to remove missing images
* preserving the original text intent when
* a fallback for network problems is desired.
* Automatically added to Image nodes via DOM
* It could be recycled for string operations via:
* $('img.emoji').on('error', twemoji.onerror)
*/
onerror: function onerror() {
if (this.parentNode) {
this.parentNode.replaceChild(createText(this.alt, false), this);
}
},
/**
* Main method/logic to generate either <img> tags or HTMLImage nodes.
* "emojify" a generic text or DOM Element.
*
* @overloads
*
* String replacement for `innerHTML` or server side operations
* twemoji.parse(string);
* twemoji.parse(string, Function);
* twemoji.parse(string, Object);
*
* HTMLElement tree parsing for safer operations over existing DOM
* twemoji.parse(HTMLElement);
* twemoji.parse(HTMLElement, Function);
* twemoji.parse(HTMLElement, Object);
*
* @param string|HTMLElement the source to parse and enrich with emoji.
*
* string replace emoji matches with <img> tags.
* Mainly used to inject emoji via `innerHTML`
* It does **not** parse the string or validate it,
* it simply replaces found emoji with a tag.
* NOTE: be sure this won't affect security.
*
* HTMLElement walk through the DOM tree and find emoji
* that are inside **text node only** (nodeType === 3)
* Mainly used to put emoji in already generated DOM
* without compromising surrounding nodes and
* **avoiding** the usage of `innerHTML`.
* NOTE: Using DOM elements instead of strings should
* improve security without compromising too much
* performance compared with a less safe `innerHTML`.
*
* @param Function|Object [optional]
* either the callback that will be invoked or an object
* with all properties to use per each found emoji.
*
* Function if specified, this will be invoked per each emoji
* that has been found through the RegExp except
* those follwed by the invariant \uFE0E ("as text").
* Once invoked, parameters will be:
*
* iconId:string the lower case HEX code point
* i.e. "1f4a9"
*
* options:Object all info for this parsing operation
*
* variant:char the optional \uFE0F ("as image")
* variant, in case this info
* is anyhow meaningful.
* By default this is ignored.
*
* If such callback will return a falsy value instead
* of a valid `src` to use for the image, nothing will
* actually change for that specific emoji.
*
*
* Object if specified, an object containing the following properties
*
* callback Function the callback to invoke per each found emoji.
* base string the base url, by default twemoji.base
* ext string the image extension, by default twemoji.ext
* size string the assets size, by default twemoji.size
*
* @example
*
* twemoji.parse("I \u2764\uFE0F emoji!");
* // I <img class="emoji" draggable="false" alt="❤️" src="/assets/2764.gif"/> emoji!
*
*
* twemoji.parse("I \u2764\uFE0F emoji!", function(iconId, options) {
* return '/assets/' + iconId + '.gif';
* });
* // I <img class="emoji" draggable="false" alt="❤️" src="/assets/2764.gif"/> emoji!
*
*
* twemoji.parse("I \u2764\uFE0F emoji!", {
* size: 72,
* callback: function(iconId, options) {
* return '/assets/' + options.size + '/' + iconId + options.ext;
* }
* });
* // I <img class="emoji" draggable="false" alt="❤️" src="/assets/72x72/2764.png"/> emoji!
*
*/
parse: parse,
/**
* Given a string, invokes the callback argument
* per each emoji found in such string.
* This is the most raw version used by
* the .parse(string) method itself.
*
* @param string generic string to parse
* @param Function a generic callback that will be
* invoked to replace the content.
* This calback wil receive standard
* String.prototype.replace(str, callback)
* arguments such:
* callback(
* rawText, // the emoji match
* );
*
* and others commonly received via replace.
*/
replace: replace,
/**
* Simplify string tests against emoji.
*
* @param string some text that might contain emoji
* @return boolean true if any emoji was found, false otherwise.
*
* @example
*
* if (twemoji.test(someContent)) {
* console.log("emoji All The Things!");
* }
*/
test: test
},
// used to escape HTML special chars in attributes
escaper = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
"'": '&#39;',
'"': '&quot;'
},
// RegExp based on emoji's official Unicode standards
// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
re = /twemoji/,
// avoid runtime RegExp creation for not so smart,
// not JIT based, and old browsers / engines
UFE0Fg = /\uFE0F/g,
// avoid using a string literal like '\u200D' here because minifiers expand it inline
U200D = String.fromCharCode(0x200D),
// used to find HTML special chars in attributes
rescaper = /[&<>'"]/g,
// nodes with type 1 which should **not** be parsed
shouldntBeParsed = /^(?:iframe|noframes|noscript|script|select|style|textarea)$/,
// just a private shortcut
fromCharCode = String.fromCharCode;
return twemoji;
/////////////////////////
// private functions //
// declaration //
/////////////////////////
/**
* Shortcut to create text nodes
* @param string text used to create DOM text node
* @return Node a DOM node with that text
*/
function createText(text, clean) {
return document.createTextNode(clean ? text.replace(UFE0Fg, '') : text);
}
/**
* Utility function to escape html attribute text
* @param string text use in HTML attribute
* @return string text encoded to use in HTML attribute
*/
function escapeHTML(s) {
return s.replace(rescaper, replacer);
}
/**
* Default callback used to generate emoji src
* based on Twitter CDN
* @param string the emoji codepoint string
* @param string the default size to use, i.e. "36x36"
* @return string the image source to use
*/
function defaultImageSrcGenerator(icon, options) {
return ''.concat(options.base, options.size, '/', icon, options.ext);
}
/**
* Given a generic DOM nodeType 1, walk through all children
* and store every nodeType 3 (#text) found in the tree.
* @param Element a DOM Element with probably some text in it
* @param Array the list of previously discovered text nodes
* @return Array same list with new discovered nodes, if any
*/
function grabAllTextNodes(node, allText) {
var
childNodes = node.childNodes,
length = childNodes.length,
subnode,
nodeType;
while (length--) {
subnode = childNodes[length];
nodeType = subnode.nodeType;
// parse emoji only in text nodes
if (nodeType === 3) {
// collect them to process emoji later
allText.push(subnode);
}
// ignore all nodes that are not type 1, that are svg, or that
// should not be parsed as script, style, and others
else if (nodeType === 1 && !('ownerSVGElement' in subnode) &&
!shouldntBeParsed.test(subnode.nodeName.toLowerCase())) {
grabAllTextNodes(subnode, allText);
}
}
return allText;
}
/**
* Used to both remove the possible variant
* and to convert utf16 into code points.
* If there is a zero-width-joiner (U+200D), leave the variants in.
* @param string the raw text of the emoji match
* @return string the code point
*/
function grabTheRightIcon(rawText) {
// if variant is present as \uFE0F
return toCodePoint(rawText.indexOf(U200D) < 0 ?
rawText.replace(UFE0Fg, '') :
rawText
);
}
/**
* DOM version of the same logic / parser:
* emojify all found sub-text nodes placing images node instead.
* @param Element generic DOM node with some text in some child node
* @param Object options containing info about how to parse
*
* .callback Function the callback to invoke per each found emoji.
* .base string the base url, by default twemoji.base
* .ext string the image extension, by default twemoji.ext
* .size string the assets size, by default twemoji.size
*
* @return Element same generic node with emoji in place, if any.
*/
function parseNode(node, options) {
var
allText = grabAllTextNodes(node, []),
length = allText.length,
attrib,
attrname,
modified,
fragment,
subnode,
text,
match,
i,
index,
img,
rawText,
iconId,
src;
while (length--) {
modified = false;
fragment = document.createDocumentFragment();
subnode = allText[length];
text = subnode.nodeValue;
i = 0;
while ((match = re.exec(text))) {
index = match.index;
if (index !== i) {
fragment.appendChild(
createText(text.slice(i, index), true)
);
}
rawText = match[0];
iconId = grabTheRightIcon(rawText);
i = index + rawText.length;
src = options.callback(iconId, options);
if (iconId && src) {
img = new Image();
img.onerror = options.onerror;
img.setAttribute('draggable', 'false');
attrib = options.attributes(rawText, iconId);
for (attrname in attrib) {
if (
attrib.hasOwnProperty(attrname) &&
// don't allow any handlers to be set + don't allow overrides
attrname.indexOf('on') !== 0 &&
!img.hasAttribute(attrname)
) {
img.setAttribute(attrname, attrib[attrname]);
}
}
img.className = options.className;
img.alt = rawText;
img.src = src;
modified = true;
fragment.appendChild(img);
}
if (!img) fragment.appendChild(createText(rawText, false));
img = null;
}
// is there actually anything to replace in here ?
if (modified) {
// any text left to be added ?
if (i < text.length) {
fragment.appendChild(
createText(text.slice(i), true)
);
}
// replace the text node only, leave intact
// anything else surrounding such text
subnode.parentNode.replaceChild(fragment, subnode);
}
}
return node;
}
/**
* String/HTML version of the same logic / parser:
* emojify a generic text placing images tags instead of surrogates pair.
* @param string generic string with possibly some emoji in it
* @param Object options containing info about how to parse
*
* .callback Function the callback to invoke per each found emoji.
* .base string the base url, by default twemoji.base
* .ext string the image extension, by default twemoji.ext
* .size string the assets size, by default twemoji.size
*
* @return the string with <img tags> replacing all found and parsed emoji
*/
function parseString(str, options) {
return replace(str, function (rawText) {
var
ret = rawText,
iconId = grabTheRightIcon(rawText),
src = options.callback(iconId, options),
attrib,
attrname;
if (iconId && src) {
// recycle the match string replacing the emoji
// with its image counter part
ret = '<img '.concat(
'class="', options.className, '" ',
'draggable="false" ',
// needs to preserve user original intent
// when variants should be copied and pasted too
'alt="',
rawText,
'"',
' src="',
src,
'"'
);
attrib = options.attributes(rawText, iconId);
for (attrname in attrib) {
if (
attrib.hasOwnProperty(attrname) &&
// don't allow any handlers to be set + don't allow overrides
attrname.indexOf('on') !== 0 &&
ret.indexOf(' ' + attrname + '=') === -1
) {
ret = ret.concat(' ', attrname, '="', escapeHTML(attrib[attrname]), '"');
}
}
ret = ret.concat('/>');
}
return ret;
});
}
/**
* Function used to actually replace HTML special chars
* @param string HTML special char
* @return string encoded HTML special char
*/
function replacer(m) {
return escaper[m];
}
/**
* Default options.attribute callback
* @return null
*/
function returnNull() {
return null;
}
/**
* Given a generic value, creates its squared counterpart if it's a number.
* As example, number 36 will return '36x36'.
* @param any a generic value.
* @return any a string representing asset size, i.e. "36x36"
* only in case the value was a number.
* Returns initial value otherwise.
*/
function toSizeSquaredAsset(value) {
return typeof value === 'number' ?
value + 'x' + value :
value;
}
/////////////////////////
// exported functions //
// declaration //
/////////////////////////
function fromCodePoint(codepoint) {
var code = typeof codepoint === 'string' ?
parseInt(codepoint, 16) : codepoint;
if (code < 0x10000) {
return fromCharCode(code);
}
code -= 0x10000;
return fromCharCode(
0xD800 + (code >> 10),
0xDC00 + (code & 0x3FF)
);
}
function parse(what, how) {
if (!how || typeof how === 'function') {
how = {callback: how};
}
// if first argument is string, inject html <img> tags
// otherwise use the DOM tree and parse text nodes only
return (typeof what === 'string' ? parseString : parseNode)(what, {
callback: how.callback || defaultImageSrcGenerator,
attributes: typeof how.attributes === 'function' ? how.attributes : returnNull,
base: typeof how.base === 'string' ? how.base : twemoji.base,
ext: how.ext || twemoji.ext,
size: how.folder || toSizeSquaredAsset(how.size || twemoji.size),
className: how.className || twemoji.className,
onerror: how.onerror || twemoji.onerror
});
}
function replace(text, callback) {
return String(text).replace(re, callback);
}
function test(text) {
// IE6 needs a reset before too
re.lastIndex = 0;
var result = re.test(text);
re.lastIndex = 0;
return result;
}
function toCodePoint(unicodeSurrogates, sep) {
var
r = [],
c = 0,
p = 0,
i = 0;
while (i < unicodeSurrogates.length) {
c = unicodeSurrogates.charCodeAt(i++);
if (p) {
r.push((0x10000 + ((p - 0xD800) << 10) + (c - 0xDC00)).toString(16));
p = 0;
} else if (0xD800 <= c && c <= 0xDBFF) {
p = c;
} else {
r.push(c.toString(16));
}
}
return r.join(sep || '-');
}
}.toString()
// drop current indentation
.replace(/^ /gm, '')
// add the RegExp in the right place
.replace('re = /twemoji/', `re = /${fs.readFileSync(path.join(__dirname, 'regex')).toString().trim()}/g`)
// add the full license
.replace('/*! (C) Twitter Inc. */',
'/*! (C) Twitter Inc. *//*\n' +
fs.readFileSync(path.join(__dirname, '../../', 'LICENSE')).toString().replace(
/^./gm, ' '
) +
'\n */'
) + '());');
}