1
0
mirror of https://github.com/twitter/twemoji.git synced 2024-11-19 11:14:59 +00:00
twemoji/2/utils/generate
Bryan Haggerty 6060723491 Gender equal emojis, new emojis for professions, and tweaks to existing emojis
Includes additions to address gender equality, expanded professions,
increased skin tone support, family representations, and minor
modifications to some existing emojis.

Many of the additions included are based on the [Emoji 4.0 draft
spec](http://unicode.org/reports/tr51/proposed.html) and are subject to
change prior to ratification.

This change also moves the Twemoji version number to 2.2.

### New

* Woman with turban
* Blonde woman
* Man getting face massage
* Man getting haircut
* Man frowning
* Man pouting
* Man gesturing not ok
* Man gesturing ok
* Man tipping hand
* Man raising hand
* Woman bowing deeply
* Man facepalming
* Man shrugging
* Man doing cartwheel
* Woman juggling
* Man juggling
* Woman walking
* Woman running
* Women wrestling
* Woman playing water polo
* Man playing handball
* Men with bunny ears
* Woman rowing boat
* Woman golfing
* Woman surfing
* Woman swimming
* Man with ball
* Woman weight lifting
* Woman biking
* Woman mountain biking
* Male health worker
* Female health worker
* Male judge
* Female judge
* Male pilot
* Female pilot
* Male farmer
* Female farmer
* Male cook
* Female cook
* Male student
* Female student
* Male singer
* Female singer
* Male artist
* Female artist
* Male teacher
* Female teacher
* Male factory worker
* Female factory worker
* Male technologist
* Female technologist
* Male office worker
* Female office worker
* Male mechanic
* Female mechanic
* Male scientist
* Female scientist
* Male astronaut
* Female astronaut
* Male firefighter
* Female firefighter
* Female police officer
* Female construction worker
* Female guard
* Female sleuth
* Family (man, boy)
* Family (man, boy, boy)
* Family (man, girl)
* Family (man, girl, boy)
* Family (man, girl, girl)
* Family (woman, boy)
* Family (woman, boy, boy)
* Family (woman, girl)
* Family (woman, girl, boy)
* Family (woman, girl, girl)
* UN Flag
* Female sign
* Male sign
* Staff of aesculapius

### Modified

* Man with turban
* Person with ball
* Family
* Man and woman holding hands
* Two men holding hands
* Two women holding hands
* Women with bunny ears
* Man in business suit levitating
* Sleeping accommodation
* Skier
* Snowboarder
* Flag of Réunion
* Grinning face with smiling eyes
* Roasted sweet potato
* Bird
* Man with gua pi mao
* Railway car
* Face with rolling eyes
* Astonished face
2016-09-20 10:29:59 -07:00

1067 lines
37 KiB
JavaScript
Executable File

#!/usr/bin/env node
/*! Copyright Twitter Inc. and other contributors. Licensed under MIT *//*
https://github.com/twitter/twemoji/blob/gh-pages/LICENSE
*/
// dependencies
var fs = require('fs');
var http = require('http');
var path = require('path');
function file(which) {
return path.join(__dirname, '../..', which);
}
// Twitter assets by property name
var assets = {
'2/72x72': [],
'2/svg': []
};
var skinToneOptions = [
'\\ud83c\\udffb',
'\\ud83c\\udffc',
'\\ud83c\\udffd',
'\\ud83c\\udffe',
'\\ud83c\\udfff'
];
// white spaces we don't want to catch via the RegExp
// there is no asset equivalent for these
var ignoreMissing = ['2002', '2003', '2005'];
// basic utilities to convert codepoints to JSON strings
function toJSON(codePoints) {
return codePoints.split('-').map(function (point) {
return UTF162JSON(fromCodePoint(point));
}).join('');
}
function fromCodePoint(codepoint) {
var code = typeof codepoint === 'string' ?
parseInt(codepoint, 16) : codepoint;
if (code < 0x10000) {
return String.fromCharCode(code);
}
code -= 0x10000;
return String.fromCharCode(
0xD800 + (code >> 10),
0xDC00 + (code & 0x3FF)
);
}
function UTF162JSON(text) {
for (var i = 0, r = []; i < text.length; i++) {
r.push('\\u' + ('000' + text.charCodeAt(i).toString(16)).slice(-4));
}
return r.join('');
}
// Items is an array of unicode sequences with \u escaping, like ["\u2963\ufe0f", "\u263a\ufe0f"]
// items get sorted by length (long to short), then unicode hex values (low to high)
// output is "or" ed together using | for regex
// ouput also combines adjacent items using character classes with ranges when they have common prefixes
// Example: "aab", "aac", "aad", "aag", "ba" becomes "aa[b-dg]|ba"
function generateRegexPartial(items) {
var currentPrefix = null;
var result = [];
var charClass = [];
var charRange = [];
items.map(function (item) {
// Convert from "\u2963\ufe0f" into ["2963", "fe0f"]
return item.split('\\u').slice(1);
}).sort(sortMethod).forEach(function (itemParts) {
var prefix = itemParts.slice(0, -1).join('\\u');
if (prefix) {
prefix = '\\u' + prefix;
}
var suffix = itemParts.slice(-1);
if (prefix !== currentPrefix) {
flushCharClass();
}
currentPrefix = prefix;
var suffixMinusOne = UTF162JSON(String.fromCharCode(parseInt(suffix, 16) - 1));
if (charRange.length && charRange.slice(-1)[0] !== suffixMinusOne) {
flushCharRange();
}
charRange.push('\\u' + suffix);
});
flushCharClass();
return result.join('|');
// a and b are arrays of hex UCS-2 units
function sortMethod(a, b) {
return !a.length ? 0 :
b.length - a.length ||
parseInt(a[0], 16) - parseInt(b[0], 16) ||
sortMethod(a.slice(1), b.slice(1)
);
}
function flushCharRange() {
charClass = charClass.concat((charRange.length < 3) ?
charRange :
[ charRange[0], '-', charRange.slice(-1)[0] ]
);
charRange = [];
}
function flushCharClass() {
flushCharRange();
if (charClass.length) {
result.push(currentPrefix + (charClass.length == 1 ?
charClass[0] :
'[' + charClass.join('') + ']'
));
}
charClass = [];
currentPrefix = null;
}
}
// basic utility to organize async code
// see: http://webreflection.blogspot.co.uk/2012/03/tweet-sized-queue-system.html
// or: http://webreflection.blogspot.co.uk/2012/06/working-with-queues.html
function Queue(args, f) {
setTimeout(args.next = function next() {
return (f = args.shift()) ? !!f(args) || !0 : !1;
}, 0);
return args;
}
// main task
Queue([
// will populate assets arrays
function grabAllAssets(q) {
console.log('analyzing all assets ... ');
// per each path/folder
Object.keys(assets).forEach(function (path, i, paths) {
// grab all files in that folder
fs.readdir(file(path), function (err, files) {
// and add them to the assets path
assets[path].push.apply(
assets[path],
files.map(upperCaseWithoutExtension)
);
// once all assets arrays have been populated
if (paths.reduce(completed, true)) {
console.log('[INFO] assets contains ' + assets[path].length + ' emoji.');
q.next();
}
});
});
// drop extension + uppercase
function upperCaseWithoutExtension(file) {
return file.slice(0, file.lastIndexOf('.')).toUpperCase();
}
// returns true if all assets have been populated
function completed(p, c) {
return p && assets[c].length;
}
},
// will fetch and store all emoji from unicode.org
function fetchEmojiSources(q) {
console.log('fetching EmojiSources.txt ... ');
// grab all emoji and test them against them
http.get("http://www.unicode.org/Public/UNIDATA/EmojiSources.txt", function (res) {
var chunks = [];
// if all good ...
if (res.statusCode === 200) {
// grab all data
res.on('data', chunks.push.bind(chunks));
// once done ...
res.on('end', function () {
console.log('analyzing EmojiSources VS our assets ... ');
// store all missing assets in one object
var missing = {};
// will be used to store an array with all missing
var missingGrouped = {};
// will be needed later on
// parse it, clean it, and store it once
q.emojiSource = chunks
.join('')
.split(/\r\n|\r|\n/)
// filter once
.filter(function (line) {
return this.test(line);
}, /^[0-9A-F]/)
// take only emoji info
.map(function (codePoint) {
return codePoint
.slice(0, codePoint.indexOf(';'))
.toUpperCase()
// drop spaces
.replace(/\s+/g, '-')
// drop 0 padded prefixes
.replace(/^0+/g, '');
});
console.log('[INFO] parsed ' + q.emojiSource.length + ' standard emoji.');
// find out which one is missing from our assets
q.emojiSource.forEach(
function (emoji) {
// do not loop for emoji we know we should ignore
if (ignoreMissing.indexOf(emoji) < 0) {
// verify all others per each folder
this.forEach(function (path) {
if (assets[path].indexOf(emoji) < 0) {
(missing[path] || (missing[path] = [])).push(emoji);
missingGrouped[emoji] = true;
}
});
}
},
// and per each folder
Object.keys(assets)
);
// if some missing emoji has been found
if (Object.keys(missing).length) {
// warn and show which one is missing
console.warn('[WARNING] missing assets for:');
console.log(missing);
}
// create the array of all emoji we should ignore
q.ignore = ignoreMissing.concat(Object.keys(missingGrouped));
q.next();
});
} else {
console.error('[ERROR] unable to fetch emoji at unicode.org');
process.exit(1);
}
});
},
// grab the list of emoji that behave differently when
// variants such \uFE0E and \uFE0F are in place
function grabStandardVariants(q) {
console.log('fetching StandardizedVariants.txt ... ');
http.get(
"http://unicode.org/Public/UNIDATA/StandardizedVariants.txt",
function(res) {
var chunks = [];
if (res.statusCode == 200) {
res.on('data', chunks.push.bind(chunks));
res.on('end', function () {
// cleaning up parsing sensitive emoji
q.variantsSensitive = chunks
.join('') // all content
.split(/\r\n|\r|\n/) // split in lines
.filter(function (line) { // containing FE0E; info
return this.test(line); // avoiding duplicated with FE0F
}, / FE0E; text style/)
.map(function (line) { // cleaned up to grab
return line.replace(this, '$1') // only first unicode
.toUpperCase(); // normalized as uppercase
}, /^([0-9A-F]{4,}) FE0E;.+$/) // sensitive char
;
// iOS keyboard allows U+002A U+FE0F U+20E3 even though not a standardized variant (yet?)
q.variantsSensitive.push('002A');
// iOS keyboard allows U+2639 U+FE0F even though not a standardized variant (yet?)
q.variantsSensitive.push('2639');
console.log('[INFO] parsed ' + q.variantsSensitive.length + ' variant sensitive emoji.');
q.next();
});
} else {
console.error('[ERROR] unable to fetch standard variants at unicode.org');
process.exit(1);
}
}
);
},
// add our own assets that are not part of the Unicode standard
function addMissingEmoji(q) {
q.nonStandard = [];
Object.keys(assets).forEach(function (path, i) {
assets[path].forEach(function (emoji) {
if (
q.emojiSource.indexOf(emoji) < 0 &&
q.nonStandard.indexOf(emoji) < 0
) {
q.nonStandard.push(emoji);
}
});
});
if (q.nonStandard.length) {
console.warn('[WARNING] assets contain ' + q.nonStandard.length + ' non standard emoji:');
// console.log(q.nonStandard.join(', '));
}
q.emojiSource = q.emojiSource.concat(q.nonStandard)
q.next();
},
// detect complete sets of five skin tones and a base
function detectDiversityEmoji(q) {
var isPresent = {};
q.emojiSource.forEach(function (codePoints) {
isPresent[codePoints] = true;
});
q.diversityBase = q.emojiSource.filter(function (codePoints) {
// Start with the set of Emoji with the light skin tone
return /-1F3FB$/.test(codePoints);
}).map(function (codePoints) {
// Take the skin tone off
return codePoints.replace(/-1F3FB$/, '');
}).filter(function (baseCodePoints) {
// Verify that all other skin tones + no skin tone are present
return ['-1F3FC', '-1F3FD', '-1F3FE', '-1F3FF', ''].every(function (suffix) {
return isPresent[baseCodePoints + suffix];
});
});
console.log('[INFO] parsed ' + q.diversityBase.length + ' diversity emoji.');
q.next();
},
// detect complete sets of five skin tones and a base
function partitionEmojiTypes(q) {
console.log('partitioning emoji into types');
q.zwj = [];
q.diversity = [];
q.sensitive = [];
q.sensitiveKeycaps = [];
q.diversitySensitive = [];
q.regular = [];
q.emojiSource.forEach(function (codePoints) {
var u;
var codePointsWithoutKeycap;
codePoints = codePoints.replace(/\b[A-F0-9]+\b/g, function (hex) {
// Pad all hex numbers to have at least 4 digits to match variantsSensitive
return hex.length < 4 ? ('000' + hex).slice(-4) : hex;
});
if (q.ignore.indexOf(codePoints) < 0) {
u = toJSON(codePoints);
codePointsWithoutKeycap = codePoints.replace(/-20E3$/, '');
if (codePoints.indexOf('200D') >= 0) {
q.zwj.push(u);
} else if (codePoints != codePointsWithoutKeycap && q.variantsSensitive.indexOf(codePointsWithoutKeycap) >= 0) {
q.sensitiveKeycaps.push(toJSON(codePointsWithoutKeycap));
} else if (q.diversityBase.indexOf(codePoints.replace(/-1F3F[B-F]$/, '')) >= 0) {
// This is a diversity Emoji with or without a skin tone modifier
// Add it to the regex if this is the base without the modifier
if (q.diversityBase.indexOf(codePoints) >= 0) {
if (q.variantsSensitive.indexOf(codePoints) < 0) {
q.diversity.push(u);
} else {
q.diversitySensitive.push(u);
}
}
} else if (q.variantsSensitive.indexOf(codePoints) < 0) {
q.regular.push(u);
} else {
q.sensitive.push(u);
}
}
});
q.next();
},
function factorZwjSequences(q) {
q.zwjCommonPatterns = [];
// There are dozens of new ZWJ sequences that have common prefixes or suffixes with
// skin tone + gender variations. To keep the main regex from growing excessively large and
// slow, choose some common sub-expressions to factor.
var commonPatterns = [
{
name: 'leading man/woman zwj with optional skin tone',
re: '\\ud83d[\\udc68-\\udc69](?:\\ud83c[\\udffb-\\udfff])?\\u200d(.+?)',
numCombinations: 12
}, {
name: 'variant or skin tone before trailing female/male zwj',
re: '(.+?)(?:\\ufe0f|\\ud83c[\\udffb-\\udfff])\\u200d[\\u2640\\u2642]\\ufe0f',
numCombinations: 12
}, {
name: 'optional skin tone before trailing female/male zwj',
re: '(.+?)(?:\\ud83c[\\udffb-\\udfff])?\\u200d[\\u2640\\u2642]\\ufe0f',
numCombinations: 12
}
];
commonPatterns.forEach(function(pattern) {
var mapOfMatches = {};
var re = new RegExp('^' + pattern.re + '$');
q.zwj.forEach(function(jsonString) {
var rawString = JSON.parse('"' + jsonString + '"');
var match = rawString.match(re);
if (match) {
var key = match[1];
mapOfMatches[key] = mapOfMatches[key] || [];
mapOfMatches[key].push(match[0]);
}
});
var replacements = [];
Object.keys(mapOfMatches).forEach(function(key) {
var matches = mapOfMatches[key];
// Only a complete set may be replaced
if (matches.length === pattern.numCombinations) {
replacements.push(UTF162JSON(key));
// Remove all items in the match set from the original zwj list
matches.forEach(function(rawString) {
var indexToRemove = q.zwj.indexOf(UTF162JSON(rawString));
if (indexToRemove >= 0) {
q.zwj.splice(indexToRemove, 1);
}
});
}
});
if (replacements.length) {
// Replace the wildcard section of the regex with a regex group of replacements
var re = pattern.re.replace('(.+?', '(?:' + generateRegexPartial(replacements));
q.zwjCommonPatterns.push(re);
console.log('Refactoring ' + replacements.length + ' complete sets of ' + pattern.numCombinations + ' zwj from ' + pattern.name);
} else {
console.log('did not find any complete sets of ' + pattern.name);
}
});
q.next();
},
// with all info, generate a RegExp that will catch
// only standard emoji that are present in our assets
function generateRegExp(q) {
console.log('generating a RegExp for available assets');
q.re = '';
// The Zero-width joiner common patterns, if present, need to come first
if (q.zwjCommonPatterns.length) {
q.re += q.zwjCommonPatterns.join('|') + '|';
}
// Then the rest of the zwjs
if (q.zwj.length) {
q.re += generateRegexPartial(q.zwj) + '|';
}
// Group the variant sensitive keycaps
if (q.sensitiveKeycaps.length) {
q.re += '(?:' + generateRegexPartial(q.sensitiveKeycaps) + ')\\ufe0f?\\u20e3|';
}
// Next, add the diversity enabled Emoji that may include a skin tone suffix
if (q.diversity.length + q.diversitySensitive.length) {
q.re += '(?:';
if (q.diversitySensitive.length) {
// Some diversity are sensitive to variants
q.re += '(?:' + generateRegexPartial(q.diversitySensitive) + ')(?:\\ufe0f|(?!\\ufe0e))';
if (q.diversity.length) {
q.re += '|';
}
}
q.re += generateRegexPartial(q.diversity) + ')(?:' + generateRegexPartial(skinToneOptions) + '|)|';
}
// Next, the normal Emoji
q.re += generateRegexPartial(q.regular) + '|';
// Finally, add the rest of the sensitive ones that may be followed by U+FE0F but not U+FE0E
q.re += '(?:' + generateRegexPartial(q.sensitive) + ')(?:\\ufe0f|(?!\\ufe0e))';
q.next();
},
function generateFile(q) {
console.log('generating ./twemoji.js');
createTwemoji(q.re);
require('./create-dist');
}
]);
function createTwemoji(re) {
fs.writeFileSync(
file('2/twemoji.js'),
'/*jslint indent: 2, browser: true, bitwise: true, plusplus: true */\n' +
'var twemoji = (' +
function (
/*! Copyright Twitter Inc. and other contributors. Licensed under MIT *//*
https://github.com/twitter/twemoji/blob/gh-pages/LICENSE
*/
// WARNING: this file is generated automatically via
// `node twemoji-generator.js`
// please update its `createTwemoji` function
// at the bottom of the same file instead.
) {
'use strict';
/*jshint maxparams:4 */
var
// the exported module object
twemoji = {
/////////////////////////
// properties //
/////////////////////////
// default assets url, by default will be Twitter Inc. CDN
base: 'https://twemoji.maxcdn.com/2/',
// default assets file extensions, by default '.png'
ext: '.png',
// default assets/folder size, by default "72x72"
// available via Twitter CDN: 72
size: '72x72',
// default class name, by default 'emoji'
className: 'emoji',
// basic utilities / helpers to convert code points
// to JavaScript surrogates and vice versa
convert: {
/**
* Given an HEX codepoint, returns UTF16 surrogate pairs.
*
* @param string generic codepoint, i.e. '1F4A9'
* @return string codepoint transformed into utf16 surrogates pair,
* i.e. \uD83D\uDCA9
*
* @example
* twemoji.convert.fromCodePoint('1f1e8');
* // "\ud83c\udde8"
*
* '1f1e8-1f1f3'.split('-').map(twemoji.convert.fromCodePoint).join('')
* // "\ud83c\udde8\ud83c\uddf3"
*/
fromCodePoint: fromCodePoint,
/**
* Given UTF16 surrogate pairs, returns the equivalent HEX codepoint.
*
* @param string generic utf16 surrogates pair, i.e. \uD83D\uDCA9
* @param string optional separator for double code points, default='-'
* @return string utf16 transformed into codepoint, i.e. '1F4A9'
*
* @example
* twemoji.convert.toCodePoint('\ud83c\udde8\ud83c\uddf3');
* // "1f1e8-1f1f3"
*
* twemoji.convert.toCodePoint('\ud83c\udde8\ud83c\uddf3', '~');
* // "1f1e8~1f1f3"
*/
toCodePoint: toCodePoint
},
/////////////////////////
// methods //
/////////////////////////
/**
* User first: used to remove missing images
* preserving the original text intent when
* a fallback for network problems is desired.
* Automatically added to Image nodes via DOM
* It could be recycled for string operations via:
* $('img.emoji').on('error', twemoji.onerror)
*/
onerror: function onerror() {
if (this.parentNode) {
this.parentNode.replaceChild(createText(this.alt), this);
}
},
/**
* Main method/logic to generate either <img> tags or HTMLImage nodes.
* "emojify" a generic text or DOM Element.
*
* @overloads
*
* String replacement for `innerHTML` or server side operations
* twemoji.parse(string);
* twemoji.parse(string, Function);
* twemoji.parse(string, Object);
*
* HTMLElement tree parsing for safer operations over existing DOM
* twemoji.parse(HTMLElement);
* twemoji.parse(HTMLElement, Function);
* twemoji.parse(HTMLElement, Object);
*
* @param string|HTMLElement the source to parse and enrich with emoji.
*
* string replace emoji matches with <img> tags.
* Mainly used to inject emoji via `innerHTML`
* It does **not** parse the string or validate it,
* it simply replaces found emoji with a tag.
* NOTE: be sure this won't affect security.
*
* HTMLElement walk through the DOM tree and find emoji
* that are inside **text node only** (nodeType === 3)
* Mainly used to put emoji in already generated DOM
* without compromising surrounding nodes and
* **avoiding** the usage of `innerHTML`.
* NOTE: Using DOM elements instead of strings should
* improve security without compromising too much
* performance compared with a less safe `innerHTML`.
*
* @param Function|Object [optional]
* either the callback that will be invoked or an object
* with all properties to use per each found emoji.
*
* Function if specified, this will be invoked per each emoji
* that has been found through the RegExp except
* those follwed by the invariant \uFE0E ("as text").
* Once invoked, parameters will be:
*
* iconId:string the lower case HEX code point
* i.e. "1f4a9"
*
* options:Object all info for this parsing operation
*
* variant:char the optional \uFE0F ("as image")
* variant, in case this info
* is anyhow meaningful.
* By default this is ignored.
*
* If such callback will return a falsy value instead
* of a valid `src` to use for the image, nothing will
* actually change for that specific emoji.
*
*
* Object if specified, an object containing the following properties
*
* callback Function the callback to invoke per each found emoji.
* base string the base url, by default twemoji.base
* ext string the image extension, by default twemoji.ext
* size string the assets size, by default twemoji.size
*
* @example
*
* twemoji.parse("I \u2764\uFE0F emoji!");
* // I <img class="emoji" draggable="false" alt="❤️" src="/assets/2764.gif"> emoji!
*
*
* twemoji.parse("I \u2764\uFE0F emoji!", function(iconId, options) {
* return '/assets/' + iconId + '.gif';
* });
* // I <img class="emoji" draggable="false" alt="❤️" src="/assets/2764.gif"> emoji!
*
*
* twemoji.parse("I \u2764\uFE0F emoji!", {
* size: 72,
* callback: function(iconId, options) {
* return '/assets/' + options.size + '/' + iconId + options.ext;
* }
* });
* // I <img class="emoji" draggable="false" alt="❤️" src="/assets/72x72/2764.png"> emoji!
*
*/
parse: parse,
/**
* Given a string, invokes the callback argument
* per each emoji found in such string.
* This is the most raw version used by
* the .parse(string) method itself.
*
* @param string generic string to parse
* @param Function a generic callback that will be
* invoked to replace the content.
* This calback wil receive standard
* String.prototype.replace(str, callback)
* arguments such:
* callback(
* rawText, // the emoji match
* );
*
* and others commonly received via replace.
*/
replace: replace,
/**
* Simplify string tests against emoji.
*
* @param string some text that might contain emoji
* @return boolean true if any emoji was found, false otherwise.
*
* @example
*
* if (twemoji.test(someContent)) {
* console.log("emoji All The Things!");
* }
*/
test: test
},
// used to escape HTML special chars in attributes
escaper = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
"'": '&#39;',
'"': '&quot;'
},
// RegExp based on emoji's official Unicode standards
// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
re = /twemoji/,
// avoid runtime RegExp creation for not so smart,
// not JIT based, and old browsers / engines
UFE0Fg = /\uFE0F/g,
// avoid using a string literal like '\u200D' here because minifiers expand it inline
U200D = String.fromCharCode(0x200D),
// used to find HTML special chars in attributes
rescaper = /[&<>'"]/g,
// nodes with type 1 which should **not** be parsed (including lower case svg)
shouldntBeParsed = /IFRAME|NOFRAMES|NOSCRIPT|SCRIPT|SELECT|STYLE|TEXTAREA|[a-z]/,
// just a private shortcut
fromCharCode = String.fromCharCode;
return twemoji;
/////////////////////////
// private functions //
// declaration //
/////////////////////////
/**
* Shortcut to create text nodes
* @param string text used to create DOM text node
* @return Node a DOM node with that text
*/
function createText(text) {
return document.createTextNode(text);
}
/**
* Utility function to escape html attribute text
* @param string text use in HTML attribute
* @return string text encoded to use in HTML attribute
*/
function escapeHTML(s) {
return s.replace(rescaper, replacer);
}
/**
* Default callback used to generate emoji src
* based on Twitter CDN
* @param string the emoji codepoint string
* @param string the default size to use, i.e. "36x36"
* @return string the image source to use
*/
function defaultImageSrcGenerator(icon, options) {
return ''.concat(options.base, options.size, '/', icon, options.ext);
}
/**
* Given a generic DOM nodeType 1, walk through all children
* and store every nodeType 3 (#text) found in the tree.
* @param Element a DOM Element with probably some text in it
* @param Array the list of previously discovered text nodes
* @return Array same list with new discovered nodes, if any
*/
function grabAllTextNodes(node, allText) {
var
childNodes = node.childNodes,
length = childNodes.length,
subnode,
nodeType;
while (length--) {
subnode = childNodes[length];
nodeType = subnode.nodeType;
// parse emoji only in text nodes
if (nodeType === 3) {
// collect them to process emoji later
allText.push(subnode);
}
// ignore all nodes that are not type 1 or that
// should not be parsed as script, style, and others
else if (nodeType === 1 && !shouldntBeParsed.test(subnode.nodeName)) {
grabAllTextNodes(subnode, allText);
}
}
return allText;
}
/**
* Used to both remove the possible variant
* and to convert utf16 into code points.
* If there is a zero-width-joiner (U+200D), leave the variants in.
* @param string the raw text of the emoji match
*/
function grabTheRightIcon(rawText) {
// if variant is present as \uFE0F
return toCodePoint(rawText.indexOf(U200D) < 0 ?
rawText.replace(UFE0Fg, '') :
rawText
);
}
/**
* DOM version of the same logic / parser:
* emojify all found sub-text nodes placing images node instead.
* @param Element generic DOM node with some text in some child node
* @param Object options containing info about how to parse
*
* .callback Function the callback to invoke per each found emoji.
* .base string the base url, by default twemoji.base
* .ext string the image extension, by default twemoji.ext
* .size string the assets size, by default twemoji.size
*
* @return Element same generic node with emoji in place, if any.
*/
function parseNode(node, options) {
var
allText = grabAllTextNodes(node, []),
length = allText.length,
attrib,
attrname,
modified,
fragment,
subnode,
text,
match,
i,
index,
img,
rawText,
iconId,
src;
while (length--) {
modified = false;
fragment = document.createDocumentFragment();
subnode = allText[length];
text = subnode.nodeValue;
i = 0;
while ((match = re.exec(text))) {
index = match.index;
if (index !== i) {
fragment.appendChild(
createText(text.slice(i, index))
);
}
rawText = match[0];
iconId = grabTheRightIcon(rawText);
i = index + rawText.length;
src = options.callback(iconId, options);
if (src) {
img = new Image();
img.onerror = options.onerror;
img.setAttribute('draggable', 'false');
attrib = options.attributes(rawText, iconId);
for (attrname in attrib) {
if (
attrib.hasOwnProperty(attrname) &&
// don't allow any handlers to be set + don't allow overrides
attrname.indexOf('on') !== 0 &&
!img.hasAttribute(attrname)
) {
img.setAttribute(attrname, attrib[attrname]);
}
}
img.className = options.className;
img.alt = rawText;
img.src = src;
modified = true;
fragment.appendChild(img);
}
if (!img) fragment.appendChild(createText(rawText));
img = null;
}
// is there actually anything to replace in here ?
if (modified) {
// any text left to be added ?
if (i < text.length) {
fragment.appendChild(
createText(text.slice(i))
);
}
// replace the text node only, leave intact
// anything else surrounding such text
subnode.parentNode.replaceChild(fragment, subnode);
}
}
return node;
}
/**
* String/HTML version of the same logic / parser:
* emojify a generic text placing images tags instead of surrogates pair.
* @param string generic string with possibly some emoji in it
* @param Object options containing info about how to parse
*
* .callback Function the callback to invoke per each found emoji.
* .base string the base url, by default twemoji.base
* .ext string the image extension, by default twemoji.ext
* .size string the assets size, by default twemoji.size
*
* @return the string with <img tags> replacing all found and parsed emoji
*/
function parseString(str, options) {
return replace(str, function (rawText) {
var
ret = rawText,
iconId = grabTheRightIcon(rawText),
src = options.callback(iconId, options),
attrib,
attrname;
if (src) {
// recycle the match string replacing the emoji
// with its image counter part
ret = '<img '.concat(
'class="', options.className, '" ',
'draggable="false" ',
// needs to preserve user original intent
// when variants should be copied and pasted too
'alt="',
rawText,
'"',
' src="',
src,
'"'
);
attrib = options.attributes(rawText, iconId);
for (attrname in attrib) {
if (
attrib.hasOwnProperty(attrname) &&
// don't allow any handlers to be set + don't allow overrides
attrname.indexOf('on') !== 0 &&
ret.indexOf(' ' + attrname + '=') === -1
) {
ret = ret.concat(' ', attrname, '="', escapeHTML(attrib[attrname]), '"');
}
}
ret = ret.concat('>');
}
return ret;
});
}
/**
* Function used to actually replace HTML special chars
* @param string HTML special char
* @return string encoded HTML special char
*/
function replacer(m) {
return escaper[m];
}
/**
* Default options.attribute callback
* @return null
*/
function returnNull() {
return null;
}
/**
* Given a generic value, creates its squared counterpart if it's a number.
* As example, number 36 will return '36x36'.
* @param any a generic value.
* @return any a string representing asset size, i.e. "36x36"
* only in case the value was a number.
* Returns initial value otherwise.
*/
function toSizeSquaredAsset(value) {
return typeof value === 'number' ?
value + 'x' + value :
value;
}
/////////////////////////
// exported functions //
// declaration //
/////////////////////////
function fromCodePoint(codepoint) {
var code = typeof codepoint === 'string' ?
parseInt(codepoint, 16) : codepoint;
if (code < 0x10000) {
return fromCharCode(code);
}
code -= 0x10000;
return fromCharCode(
0xD800 + (code >> 10),
0xDC00 + (code & 0x3FF)
);
}
function parse(what, how) {
if (!how || typeof how === 'function') {
how = {callback: how};
}
// if first argument is string, inject html <img> tags
// otherwise use the DOM tree and parse text nodes only
return (typeof what === 'string' ? parseString : parseNode)(what, {
callback: how.callback || defaultImageSrcGenerator,
attributes: typeof how.attributes === 'function' ? how.attributes : returnNull,
base: typeof how.base === 'string' ? how.base : twemoji.base,
ext: how.ext || twemoji.ext,
size: how.folder || toSizeSquaredAsset(how.size || twemoji.size),
className: how.className || twemoji.className,
onerror: how.onerror || twemoji.onerror
});
}
function replace(text, callback) {
return String(text).replace(re, callback);
}
function test(text) {
// IE6 needs a reset before too
re.lastIndex = 0;
var result = re.test(text);
re.lastIndex = 0;
return result;
}
function toCodePoint(unicodeSurrogates, sep) {
var
r = [],
c = 0,
p = 0,
i = 0;
while (i < unicodeSurrogates.length) {
c = unicodeSurrogates.charCodeAt(i++);
if (p) {
r.push((0x10000 + ((p - 0xD800) << 10) + (c - 0xDC00)).toString(16));
p = 0;
} else if (0xD800 <= c && c <= 0xDBFF) {
p = c;
} else {
r.push(c.toString(16));
}
}
return r.join(sep || '-');
}
}.toString()
// drop current indentation
.replace(/^ /gm, '')
// add the RegExp in the right place
.replace('re = /twemoji/', 're = /' + re + '/g')
// add the full license
.replace('/*! (C) Twitter Inc. */',
'/*! (C) Twitter Inc. *//*\n' +
fs.readFileSync(path.join(__dirname, '../../', 'LICENSE')).toString().replace(
/^./gm, ' '
) +
'\n */'
) + '());');
}