@@ -62,10 +62,10 @@ public static function addHtml($element, $html, $fullHTML = false, $preserveWhit
62
62
// Preprocess: remove all line ends, decode HTML entity,
63
63
// fix ampersand and angle brackets and add body tag for HTML fragments
64
64
$ html = str_replace (array ("\n" , "\r" ), '' , $ html );
65
- $ html = str_replace (array ('< ' , '> ' , '& ' ), array ('_lt_ ' , '_gt_ ' , '_amp_ ' ), $ html );
65
+ $ html = str_replace (array ('< ' , '> ' , '& ' , ' " ' ), array ('_lt_ ' , '_gt_ ' , '_amp_ ' , ' _quot_ ' ), $ html );
66
66
$ html = html_entity_decode ($ html , ENT_QUOTES , 'UTF-8 ' );
67
67
$ html = str_replace ('& ' , '& ' , $ html );
68
- $ html = str_replace (array ('_lt_ ' , '_gt_ ' , '_amp_ ' ), array ('< ' , '> ' , '& ' ), $ html );
68
+ $ html = str_replace (array ('_lt_ ' , '_gt_ ' , '_amp_ ' , ' _quot_ ' ), array ('< ' , '> ' , '& ' , ' " ; ' ), $ html );
69
69
70
70
if (false === $ fullHTML ) {
71
71
$ html = '<body> ' . $ html . '</body> ' ;
@@ -100,15 +100,43 @@ protected static function parseInlineStyle($node, $styles = array())
100
100
$ attributes = $ node ->attributes ; // get all the attributes(eg: id, class)
101
101
102
102
foreach ($ attributes as $ attribute ) {
103
- switch ($ attribute ->name ) {
103
+ $ val = $ attribute ->value ;
104
+ switch (strtolower ($ attribute ->name )) {
104
105
case 'style ' :
105
106
$ styles = self ::parseStyle ($ attribute , $ styles );
106
107
break ;
107
108
case 'align ' :
108
- $ styles ['alignment ' ] = self ::mapAlign ($ attribute -> value );
109
+ $ styles ['alignment ' ] = self ::mapAlign (trim ( $ val ) );
109
110
break ;
110
111
case 'lang ' :
111
- $ styles ['lang ' ] = $ attribute ->value ;
112
+ $ styles ['lang ' ] = $ val ;
113
+ break ;
114
+ case 'width ' :
115
+ // tables, cells
116
+ if (false !== strpos ($ val , '% ' )) {
117
+ // e.g. <table width="100%"> or <td width="50%">
118
+ $ styles ['width ' ] = intval ($ val ) * 50 ;
119
+ $ styles ['unit ' ] = \PhpOffice \PhpWord \SimpleType \TblWidth::PERCENT ;
120
+ } else {
121
+ // e.g. <table width="250> where "250" = 250px (always pixels)
122
+ $ styles ['width ' ] = Converter::pixelToTwip ($ val );
123
+ $ styles ['unit ' ] = \PhpOffice \PhpWord \SimpleType \TblWidth::TWIP ;
124
+ }
125
+ break ;
126
+ case 'cellspacing ' :
127
+ // tables e.g. <table cellspacing="2">, where "2" = 2px (always pixels)
128
+ $ val = intval ($ val ).'px ' ;
129
+ $ styles ['cellSpacing ' ] = Converter::cssToTwip ($ val );
130
+ break ;
131
+ case 'bgcolor ' :
132
+ // tables, rows, cells e.g. <tr bgColor="#FF0000">
133
+ $ styles ['bgColor ' ] = trim ($ val , '# ' );
134
+ break ;
135
+ case 'valign ' :
136
+ // cells e.g. <td valign="middle">
137
+ if (preg_match ('#(?:top|bottom|middle|baseline)#i ' , $ val , $ matches )) {
138
+ $ styles ['valign ' ] = self ::mapAlignVertical ($ matches [0 ]);
139
+ }
112
140
break ;
113
141
}
114
142
}
@@ -165,6 +193,7 @@ protected static function parseNode($node, $element, $styles = array(), $data =
165
193
'img ' => array ('Image ' , $ node , $ element , $ styles , null , null , null ),
166
194
'br ' => array ('LineBreak ' , null , $ element , $ styles , null , null , null ),
167
195
'a ' => array ('Link ' , $ node , $ element , $ styles , null , null , null ),
196
+ 'hr ' => array ('HorizRule ' , $ node , $ element , $ styles , null , null , null ),
168
197
);
169
198
170
199
$ newElement = null ;
@@ -365,7 +394,11 @@ protected static function parseCell($node, $element, &$styles)
365
394
if (!empty ($ colspan )) {
366
395
$ cellStyles ['gridSpan ' ] = $ colspan - 0 ;
367
396
}
368
- $ cell = $ element ->addCell (null , $ cellStyles );
397
+
398
+ // set cell width to control column widths
399
+ $ width = isset ($ cellStyles ['width ' ]) ? $ cellStyles ['width ' ] : null ;
400
+ unset($ cellStyles ['width ' ]); // would not apply
401
+ $ cell = $ element ->addCell ($ width , $ cellStyles );
369
402
370
403
if (self ::shouldAddTextRun ($ node )) {
371
404
return $ cell ->addTextRun (self ::parseInlineStyle ($ node , $ styles ['paragraph ' ]));
@@ -424,7 +457,32 @@ protected static function parseList($node, $element, &$styles, &$data)
424
457
} else {
425
458
$ data ['listdepth ' ] = 0 ;
426
459
$ styles ['list ' ] = 'listStyle_ ' . self ::$ listIndex ++;
427
- $ element ->getPhpWord ()->addNumberingStyle ($ styles ['list ' ], self ::getListStyle ($ isOrderedList ));
460
+ $ style = $ element ->getPhpWord ()->addNumberingStyle ($ styles ['list ' ], self ::getListStyle ($ isOrderedList ));
461
+
462
+ // extract attributes start & type e.g. <ol type="A" start="3">
463
+ $ start = 0 ;
464
+ $ type = '' ;
465
+ foreach ($ node ->attributes as $ attribute ) {
466
+ switch ($ attribute ->name ) {
467
+ case 'start ' :
468
+ $ start = (int ) $ attribute ->value ;
469
+ break ;
470
+ case 'type ' :
471
+ $ type = $ attribute ->value ;
472
+ break ;
473
+ }
474
+ }
475
+
476
+ $ levels = $ style ->getLevels ();
477
+ /** @var \PhpOffice\PhpWord\Style\NumberingLevel */
478
+ $ level = $ levels [0 ];
479
+ if ($ start > 0 ) {
480
+ $ level ->setStart ($ start );
481
+ }
482
+ $ type = $ type ? self ::mapListType ($ type ) : null ;
483
+ if ($ type ) {
484
+ $ level ->setFormat ($ type );
485
+ }
428
486
}
429
487
if ($ node ->parentNode ->nodeName === 'li ' ) {
430
488
return $ element ->getParent ();
@@ -506,7 +564,8 @@ protected static function parseStyle($attribute, $styles)
506
564
foreach ($ properties as $ property ) {
507
565
list ($ cKey , $ cValue ) = array_pad (explode (': ' , $ property , 2 ), 2 , null );
508
566
$ cValue = trim ($ cValue );
509
- switch (trim ($ cKey )) {
567
+ $ cKey = strtolower (trim ($ cKey ));
568
+ switch ($ cKey ) {
510
569
case 'text-decoration ' :
511
570
switch ($ cValue ) {
512
571
case 'underline ' :
@@ -579,11 +638,18 @@ protected static function parseStyle($attribute, $styles)
579
638
}
580
639
$ styles ['italic ' ] = $ tValue ;
581
640
break ;
641
+ case 'margin ' :
642
+ $ cValue = Converter::cssToTwip ($ cValue );
643
+ $ styles ['spaceBefore ' ] = $ cValue ;
644
+ $ styles ['spaceAfter ' ] = $ cValue ;
645
+ break ;
582
646
case 'margin-top ' :
583
- $ styles ['spaceBefore ' ] = Converter::cssToPoint ($ cValue );
647
+ // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
648
+ $ styles ['spaceBefore ' ] = Converter::cssToTwip ($ cValue );
584
649
break ;
585
650
case 'margin-bottom ' :
586
- $ styles ['spaceAfter ' ] = Converter::cssToPoint ($ cValue );
651
+ // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
652
+ $ styles ['spaceAfter ' ] = Converter::cssToTwip ($ cValue );
587
653
break ;
588
654
case 'border-color ' :
589
655
self ::mapBorderColor ($ styles , $ cValue );
@@ -607,10 +673,37 @@ protected static function parseStyle($attribute, $styles)
607
673
}
608
674
break ;
609
675
case 'border ' :
610
- if (preg_match ('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+)\s+([a-z]+)/ ' , $ cValue , $ matches )) {
611
- $ styles ['borderSize ' ] = Converter::cssToPoint ($ matches [1 ]);
612
- $ styles ['borderColor ' ] = trim ($ matches [2 ], '# ' );
613
- $ styles ['borderStyle ' ] = self ::mapBorderStyle ($ matches [3 ]);
676
+ case 'border-top ' :
677
+ case 'border-bottom ' :
678
+ case 'border-right ' :
679
+ case 'border-left ' :
680
+ // must have exact order [width color style], e.g. "1px #0011CC solid" or "2pt green solid"
681
+ // Word does not accept shortened hex colors e.g. #CCC, only full e.g. #CCCCCC
682
+ if (preg_match ('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/ ' , $ cValue , $ matches )) {
683
+ if (false !== strpos ($ cKey , '- ' )) {
684
+ $ which = explode ('- ' , $ cKey )[1 ];
685
+ $ which = ucfirst ($ which ); // e.g. bottom -> Bottom
686
+ } else {
687
+ $ which = '' ;
688
+ }
689
+ // Note - border width normalization:
690
+ // Width of border in Word is calculated differently than HTML borders, usually showing up too bold.
691
+ // Smallest 1px (or 1pt) appears in Word like 2-3px/pt in HTML once converted to twips.
692
+ // Therefore we need to normalize converted twip value to cca 1/2 of value.
693
+ // This may be adjusted, if better ratio or formula found.
694
+ // BC change: up to ver. 0.17.0 was $size converted to points - Converter::cssToPoint($size)
695
+ $ size = Converter::cssToTwip ($ matches [1 ]);
696
+ $ size = intval ($ size / 2 );
697
+ // valid variants may be e.g. borderSize, borderTopSize, borderLeftColor, etc ..
698
+ $ styles ["border {$ which }Size " ] = $ size ; // twips
699
+ $ styles ["border {$ which }Color " ] = trim ($ matches [2 ], '# ' );
700
+ $ styles ["border {$ which }Style " ] = self ::mapBorderStyle ($ matches [3 ]);
701
+ }
702
+ break ;
703
+ case 'vertical-align ' :
704
+ // https://developer.mozilla.org/en-US/docs/Web/CSS/vertical-align
705
+ if (preg_match ('#(?:top|bottom|middle|sub|baseline)#i ' , $ cValue , $ matches )) {
706
+ $ styles ['valign ' ] = self ::mapAlignVertical ($ matches [0 ]);
614
707
}
615
708
break ;
616
709
}
@@ -655,14 +748,14 @@ protected static function parseImage($node, $element)
655
748
case 'float ' :
656
749
if (trim ($ v ) == 'right ' ) {
657
750
$ style ['hPos ' ] = \PhpOffice \PhpWord \Style \Image::POS_RIGHT ;
658
- $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_PAGE ;
751
+ $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_MARGIN ; // inner section area
659
752
$ style ['pos ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELATIVE ;
660
753
$ style ['wrap ' ] = \PhpOffice \PhpWord \Style \Image::WRAP_TIGHT ;
661
754
$ style ['overlap ' ] = true ;
662
755
}
663
756
if (trim ($ v ) == 'left ' ) {
664
757
$ style ['hPos ' ] = \PhpOffice \PhpWord \Style \Image::POS_LEFT ;
665
- $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_PAGE ;
758
+ $ style ['hPosRelTo ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELTO_MARGIN ; // inner section area
666
759
$ style ['pos ' ] = \PhpOffice \PhpWord \Style \Image::POS_RELATIVE ;
667
760
$ style ['wrap ' ] = \PhpOffice \PhpWord \Style \Image::WRAP_TIGHT ;
668
761
$ style ['overlap ' ] = true ;
@@ -777,6 +870,58 @@ protected static function mapAlign($cssAlignment)
777
870
}
778
871
}
779
872
873
+ /**
874
+ * Transforms a HTML/CSS alignment into a \PhpOffice\PhpWord\SimpleType\Jc
875
+ *
876
+ * @param string $cssAlignment
877
+ * @return string|null
878
+ */
879
+ protected static function mapAlignVertical ($ alignment )
880
+ {
881
+ $ alignment = strtolower ($ alignment );
882
+ switch ($ alignment ) {
883
+ case 'top ' :
884
+ case 'baseline ' :
885
+ case 'bottom ' :
886
+ return $ alignment ;
887
+ case 'middle ' :
888
+ return 'center ' ;
889
+ case 'sub ' :
890
+ return 'bottom ' ;
891
+ case 'text-top ' :
892
+ case 'baseline ' :
893
+ return 'top ' ;
894
+ default :
895
+ // @discuss - which one should apply:
896
+ // - Word uses default vert. alignment: top
897
+ // - all browsers use default vert. alignment: middle
898
+ // Returning empty string means attribute wont be set so use Word default (top).
899
+ return '' ;
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Map list style for ordered list
905
+ *
906
+ * @param string $cssListType
907
+ */
908
+ protected static function mapListType ($ cssListType )
909
+ {
910
+ switch ($ cssListType ) {
911
+ case 'a ' :
912
+ return NumberFormat::LOWER_LETTER ; // a, b, c, ..
913
+ case 'A ' :
914
+ return NumberFormat::UPPER_LETTER ; // A, B, C, ..
915
+ case 'i ' :
916
+ return NumberFormat::LOWER_ROMAN ; // i, ii, iii, iv, ..
917
+ case 'I ' :
918
+ return NumberFormat::UPPER_ROMAN ; // I, II, III, IV, ..
919
+ case '1 ' :
920
+ default :
921
+ return NumberFormat::DECIMAL ; // 1, 2, 3, ..
922
+ }
923
+ }
924
+
780
925
/**
781
926
* Parse line break
782
927
*
@@ -812,4 +957,38 @@ protected static function parseLink($node, $element, &$styles)
812
957
813
958
return $ element ->addLink ($ target , $ node ->textContent , $ styles ['font ' ], $ styles ['paragraph ' ]);
814
959
}
960
+
961
+ /**
962
+ * Render horizontal rule
963
+ * Note: Word rule is not the same as HTML's <hr> since it does not support width and thus neither alignment
964
+ *
965
+ * @param \DOMNode $node
966
+ * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
967
+ */
968
+ protected static function parseHorizRule ($ node , $ element )
969
+ {
970
+ $ styles = self ::parseInlineStyle ($ node );
971
+
972
+ // <hr> is implemented as an empty paragraph - extending 100% inside the section
973
+ // Some properties may be controlled, e.g. <hr style="border-bottom: 3px #DDDDDD solid; margin-bottom: 0;">
974
+
975
+ $ fontStyle = $ styles + ['size ' => 3 ];
976
+
977
+ $ paragraphStyle = $ styles + [
978
+ 'lineHeight ' => 0.25 , // multiply default line height - e.g. 1, 1.5 etc
979
+ 'spacing ' => 0 , // twip
980
+ 'spaceBefore ' => 120 , // twip, 240/2 (default line height)
981
+ 'spaceAfter ' => 120 , // twip
982
+ 'borderBottomSize ' => empty ($ styles ['line-height ' ]) ? 1 : $ styles ['line-height ' ],
983
+ 'borderBottomColor ' => empty ($ styles ['color ' ]) ? '000000 ' : $ styles ['color ' ],
984
+ 'borderBottomStyle ' => 'single ' , // same as "solid"
985
+ ];
986
+
987
+ $ element ->addText ("" , $ fontStyle , $ paragraphStyle );
988
+
989
+ // Notes: <hr/> cannot be:
990
+ // - table - throws error "cannot be inside textruns", e.g. lists
991
+ // - line - that is a shape, has different behaviour
992
+ // - repeated text, e.g. underline "_", because of unpredictable line wrapping
993
+ }
815
994
}
0 commit comments