@@ -15,7 +15,6 @@ var d3 = require('d3');
15
15
16
16
var Lib = require ( '../lib' ) ;
17
17
var xmlnsNamespaces = require ( '../constants/xmlns_namespaces' ) ;
18
- var stringMappings = require ( '../constants/string_mappings' ) ;
19
18
var LINE_SPACING = require ( '../constants/alignment' ) . LINE_SPACING ;
20
19
21
20
// text converter
@@ -223,13 +222,6 @@ var PROTOCOLS = ['http:', 'https:', 'mailto:', '', undefined, ':'];
223
222
224
223
var STRIP_TAGS = new RegExp ( '</?(' + Object . keys ( TAG_STYLES ) . join ( '|' ) + ')( [^>]*)?/?>' , 'g' ) ;
225
224
226
- var ENTITY_TO_UNICODE = Object . keys ( stringMappings . entityToUnicode ) . map ( function ( k ) {
227
- return {
228
- regExp : new RegExp ( '&' + k + ';' , 'g' ) ,
229
- sub : stringMappings . entityToUnicode [ k ]
230
- } ;
231
- } ) ;
232
-
233
225
var NEWLINES = / ( \r \n ? | \n ) / g;
234
226
235
227
var SPLIT_TAGS = / ( < [ ^ < > ] * > ) / ;
@@ -254,6 +246,14 @@ var BR_TAG = /<br(\s+.*)?>/i;
254
246
*
255
247
* Because we hack in other attributes with style (sub & sup), drop any trailing
256
248
* semicolon in user-supplied styles so we can consistently append the tag-dependent style
249
+ *
250
+ * These are for tag attributes; Chrome anyway will convert entities in
251
+ * attribute values, but not in attribute names
252
+ * you can test this by for example:
253
+ * > p = document.createElement('p')
254
+ * > p.innerHTML = '<span style="font-color:red;">Hi</span>'
255
+ * > p.innerHTML
256
+ * <- '<span style="font-color:red;">Hi</span>'
257
257
*/
258
258
var STYLEMATCH = / ( ^ | [ \s " ' ] ) s t y l e \s * = \s * ( " ( [ ^ " ] * ) ; ? " | ' ( [ ^ ' ] * ) ; ? ' ) / i;
259
259
var HREFMATCH = / ( ^ | [ \s " ' ] ) h r e f \s * = \s * ( " ( [ ^ " ] * ) " | ' ( [ ^ ' ] * ) ' ) / i;
@@ -265,7 +265,8 @@ var POPUPMATCH = /(^|[\s"'])popup\s*=\s*("([\w=,]*)"|'([\w=,]*)')/i;
265
265
function getQuotedMatch ( _str , re ) {
266
266
if ( ! _str ) return null ;
267
267
var match = _str . match ( re ) ;
268
- return match && ( match [ 3 ] || match [ 4 ] ) ;
268
+ var result = match && ( match [ 3 ] || match [ 4 ] ) ;
269
+ return result && convertEntities ( result ) ;
269
270
}
270
271
271
272
var COLORMATCH = / ( ^ | ; ) \s * c o l o r : / ;
@@ -276,19 +277,70 @@ exports.plainText = function(_str) {
276
277
return ( _str || '' ) . replace ( STRIP_TAGS , ' ' ) ;
277
278
} ;
278
279
279
- function replaceFromMapObject ( _str , list ) {
280
- if ( ! _str ) return '' ;
280
+ /*
281
+ * N.B. HTML entities are listed without the leading '&' and trailing ';'
282
+ * https://www.freeformatter.com/html-entities.html
283
+ *
284
+ * FWIW if we wanted to support the full set, it has 2261 entries:
285
+ * https://www.w3.org/TR/html5/entities.json
286
+ * though I notice that some of these are duplicates and/or are missing ";"
287
+ * eg: "&", "&", "&", and "&" all map to "&"
288
+ * We no longer need to include numeric entities here, these are now handled
289
+ * by String.fromCodePoint/fromCharCode
290
+ *
291
+ * Anyway the only ones that are really important to allow are the HTML special
292
+ * chars <, >, and &, because these ones can trigger special processing if not
293
+ * replaced by the corresponding entity.
294
+ */
295
+ var entityToUnicode = {
296
+ mu : 'μ' ,
297
+ amp : '&' ,
298
+ lt : '<' ,
299
+ gt : '>' ,
300
+ nbsp : ' ' ,
301
+ times : '×' ,
302
+ plusmn : '±' ,
303
+ deg : '°'
304
+ } ;
281
305
282
- for ( var i = 0 ; i < list . length ; i ++ ) {
283
- var item = list [ i ] ;
284
- _str = _str . replace ( item . regExp , item . sub ) ;
285
- }
306
+ // NOTE: in general entities can contain uppercase too (so [a-zA-Z]) but all the
307
+ // ones we support use only lowercase. If we ever change that, update the regex.
308
+ var ENTITY_MATCH = / & ( # \d + | # x [ \d a - f A - F ] + | [ a - z ] + ) ; / g;
309
+ function convertEntities ( _str ) {
310
+ return _str . replace ( ENTITY_MATCH , function ( fullMatch , innerMatch ) {
311
+ var outChar ;
312
+ if ( innerMatch . charAt ( 0 ) === '#' ) {
313
+ // cannot use String.fromCodePoint in IE
314
+ outChar = fromCodePoint (
315
+ innerMatch . charAt ( 1 ) === 'x' ?
316
+ parseInt ( innerMatch . substr ( 2 ) , 16 ) :
317
+ parseInt ( innerMatch . substr ( 1 ) , 10 )
318
+ ) ;
319
+ }
320
+ else outChar = entityToUnicode [ innerMatch ] ;
286
321
287
- return _str ;
322
+ // as in regular HTML, if we didn't decode the entity just
323
+ // leave the raw text in place.
324
+ return outChar || fullMatch ;
325
+ } ) ;
288
326
}
289
-
290
- function convertEntities ( _str ) {
291
- return replaceFromMapObject ( _str , ENTITY_TO_UNICODE ) ;
327
+ exports . convertEntities = convertEntities ;
328
+
329
+ function fromCodePoint ( code ) {
330
+ // Don't allow overflow. In Chrome this turns into � but I feel like it's
331
+ // more useful to just not convert it at all.
332
+ if ( code > 0x10FFFF ) return ;
333
+ var stringFromCodePoint = String . fromCodePoint ;
334
+ if ( stringFromCodePoint ) return stringFromCodePoint ( code ) ;
335
+
336
+ // IE doesn't have String.fromCodePoint
337
+ // see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint
338
+ var stringFromCharCode = String . fromCharCode ;
339
+ if ( code <= 0xFFFF ) return stringFromCharCode ( code ) ;
340
+ return stringFromCharCode (
341
+ ( code >> 10 ) + 0xD7C0 ,
342
+ ( code % 0x400 ) + 0xDC00
343
+ ) ;
292
344
}
293
345
294
346
/*
@@ -302,15 +354,14 @@ function convertEntities(_str) {
302
354
* somewhat differently if it does, so just keep track of this when it happens.
303
355
*/
304
356
function buildSVGText ( containerNode , str ) {
305
- str = convertEntities ( str )
306
- /*
307
- * Normalize behavior between IE and others wrt newlines and whitespace:pre
308
- * this combination makes IE barf https://github.com/plotly/plotly.js/issues/746
309
- * Chrome and FF display \n, \r, or \r\n as a space in this mode.
310
- * I feel like at some point we turned these into <br> but currently we don't so
311
- * I'm just going to cement what we do now in Chrome and FF
312
- */
313
- . replace ( NEWLINES , ' ' ) ;
357
+ /*
358
+ * Normalize behavior between IE and others wrt newlines and whitespace:pre
359
+ * this combination makes IE barf https://github.com/plotly/plotly.js/issues/746
360
+ * Chrome and FF display \n, \r, or \r\n as a space in this mode.
361
+ * I feel like at some point we turned these into <br> but currently we don't so
362
+ * I'm just going to cement what we do now in Chrome and FF
363
+ */
364
+ str = str . replace ( NEWLINES , ' ' ) ;
314
365
315
366
var hasLink = false ;
316
367
@@ -435,7 +486,7 @@ function buildSVGText(containerNode, str) {
435
486
newLine ( ) ;
436
487
}
437
488
else if ( tagStyle === undefined ) {
438
- addTextNode ( currentNode , parti ) ;
489
+ addTextNode ( currentNode , convertEntities ( parti ) ) ;
439
490
}
440
491
else {
441
492
// tag - open or close
0 commit comments