1
0
mirror of https://github.com/twitter/twemoji.git synced 2025-02-10 15:40:33 +00:00

Merge pull request #122 from twuttke/consolidate_sensitive_keycaps

Handle keycap variants as a group in the regex
This commit is contained in:
Andrea Giammarchi 2016-03-01 21:04:14 +00:00
commit 9020ac7e6b
6 changed files with 1348 additions and 11 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

2
2/twemoji.min.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -166,6 +166,9 @@ Queue([
}, /^([0-9A-F]{4,}) FE0E;.+$/) // sensitive char }, /^([0-9A-F]{4,}) FE0E;.+$/) // sensitive char
; ;
// iOS keyboard allows U+002A U+FE0F U+20E3 even though not a standardized variant (yet?)
q.variantsSensitive.push('002A');
console.log('[INFO] parsed ' + q.variantsSensitive.length + ' variant sensitive emoji.'); console.log('[INFO] parsed ' + q.variantsSensitive.length + ' variant sensitive emoji.');
q.next(); q.next();
@ -244,14 +247,23 @@ Queue([
var zwj = []; var zwj = [];
var diversity = []; var diversity = [];
var sensitive = []; var sensitive = [];
var sensitiveKeycaps = [];
var diversitySensitive = []; var diversitySensitive = [];
var regular = []; var regular = [];
q.emojiSource.forEach(function (codePoints) { q.emojiSource.forEach(function (codePoints) {
var u; var u;
var codePointsWithoutKeycap;
codePoints = codePoints.replace(/\b[A-F0-9]+\b/g, function (hex) {
// Pad all hex numbers to have at least 4 digits to match variantsSensitive
return hex.length < 4 ? ('000' + hex).slice(-4) : hex;
});
if (q.ignore.indexOf(codePoints) < 0) { if (q.ignore.indexOf(codePoints) < 0) {
u = codePoints.split('-').map(toJSON).join(''); u = toJSON(codePoints);
codePointsWithoutKeycap = codePoints.replace(/-20E3$/, '');
if (codePoints.indexOf('200D') >= 0) { if (codePoints.indexOf('200D') >= 0) {
zwj.push(u); zwj.push(u);
} else if (codePoints != codePointsWithoutKeycap && q.variantsSensitive.indexOf(codePointsWithoutKeycap) >= 0) {
sensitiveKeycaps.push(toJSON(codePointsWithoutKeycap));
} else if (q.diversityBase.indexOf(codePoints.replace(/-1F3F[B-F]$/, '')) >= 0) { } else if (q.diversityBase.indexOf(codePoints.replace(/-1F3F[B-F]$/, '')) >= 0) {
// This is a diversity Emoji with or without a skin tone modifier // This is a diversity Emoji with or without a skin tone modifier
// Add it to the regex if this is the base without the modifier // Add it to the regex if this is the base without the modifier
@ -277,6 +289,11 @@ Queue([
q.re += zwj.join('|') + '|'; q.re += zwj.join('|') + '|';
} }
// Group the variant sensitive keycaps
if (sensitiveKeycaps.length) {
q.re += '(?:' + sensitiveKeycaps.join('|') + ')\\ufe0f?\\u20e3|';
}
// Next, add the diversity enabled Emoji that may include a skin tone suffix // Next, add the diversity enabled Emoji that may include a skin tone suffix
if (diversity.length + diversitySensitive.length) { if (diversity.length + diversitySensitive.length) {
q.re += '(?:'; q.re += '(?:';
@ -299,8 +316,10 @@ Queue([
q.next(); q.next();
// basic utilities to convert codepoints to JSON strings // basic utilities to convert codepoints to JSON strings
function toJSON(point) { function toJSON(codePoints) {
return codePoints.split('-').map(function (point) {
return UTF162JSON(fromCodePoint(point)); return UTF162JSON(fromCodePoint(point));
}).join('');
} }
function fromCodePoint(codepoint) { function fromCodePoint(codepoint) {
var code = typeof codepoint === 'string' ? var code = typeof codepoint === 'string' ?
@ -901,7 +920,6 @@ function createTwemoji(re) {
.replace(/^ /gm, '') .replace(/^ /gm, '')
// add the RegExp in the right place // add the RegExp in the right place
.replace('re = /twemoji/', 're = /' + re + '/g') .replace('re = /twemoji/', 're = /' + re + '/g')
.replace(/(\\u00[2-3][0-9])(\\u20e3)/g, '$1\\ufe0f?$2')
// add the full license // add the full license
.replace('/*! (C) Twitter Inc. */', .replace('/*! (C) Twitter Inc. */',
'/*! (C) Twitter Inc. *//*\n' + '/*! (C) Twitter Inc. *//*\n' +