Skip to content

Commit ab3a2c0

Browse files
committed
HTML Reader : Add basic support for CSS Style Tag
1 parent 8521612 commit ab3a2c0

File tree

4 files changed

+229
-38
lines changed

4 files changed

+229
-38
lines changed

src/PhpWord/Shared/Css.php

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
<?php
2+
/**
3+
* This file is part of PHPWord - A pure PHP library for reading and writing
4+
* word processing documents.
5+
*
6+
* PHPWord is free software distributed under the terms of the GNU Lesser
7+
* General Public License version 3 as published by the Free Software Foundation.
8+
*
9+
* For the full copyright and license information, please read the LICENSE
10+
* file that was distributed with this source code. For the full list of
11+
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
12+
*
13+
* @see https://github.com/PHPOffice/PHPWord
14+
*
15+
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
16+
*/
17+
declare(strict_types=1);
18+
19+
namespace PhpOffice\PhpWord\Shared;
20+
21+
class Css
22+
{
23+
/**
24+
* @var string
25+
*/
26+
private $cssContent;
27+
28+
/**
29+
* @var array<string, array<string, string>>
30+
*/
31+
private $styles = [];
32+
33+
public function __construct(string $cssContent)
34+
{
35+
$this->cssContent = $cssContent;
36+
}
37+
38+
public function process(): void
39+
{
40+
$cssContent = str_replace(["\r", "\n"], '', $this->cssContent);
41+
preg_match_all('/(.+?)\s?\{\s?(.+?)\s?\}/', $cssContent, $cssExtracted);
42+
// Check the number of extracted
43+
if (count($cssExtracted) != 3) {
44+
return;
45+
}
46+
// Check if there are x selectors and x rules
47+
if (count($cssExtracted[1]) != count($cssExtracted[2])) {
48+
return;
49+
}
50+
51+
foreach ($cssExtracted[1] as $key => $selector) {
52+
$rules = trim($cssExtracted[2][$key]);
53+
$rules = explode(';', $rules);
54+
foreach ($rules as $rule) {
55+
if (empty($rule)) {
56+
continue;
57+
}
58+
[$key, $value] = explode(':', trim($rule));
59+
$this->styles[$this->sanitize($selector)][$this->sanitize($key)] = $this->sanitize($value);
60+
}
61+
}
62+
}
63+
64+
public function getStyles(): array
65+
{
66+
return $this->styles;
67+
}
68+
69+
public function getStyle(string $selector): array
70+
{
71+
$selector = $this->sanitize($selector);
72+
73+
return $this->styles[$selector] ?? [];
74+
}
75+
76+
private function sanitize(string $value): string
77+
{
78+
return addslashes(trim($value));
79+
}
80+
}

src/PhpWord/Shared/Html.php

Lines changed: 68 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ class Html
4343

4444
protected static $options;
4545

46+
/**
47+
* @var Css
48+
*/
49+
protected static $css;
50+
4651
/**
4752
* Add HTML parts.
4853
*
@@ -149,6 +154,16 @@ protected static function parseInlineStyle($node, $styles = [])
149154
}
150155
}
151156

157+
$attributeIdentifier = $attributes->getNamedItem('id');
158+
if ($attributeIdentifier && self::$css) {
159+
$styles = self::parseStyleDeclarations(self::$css->getStyle('#' . $attributeIdentifier->value), $styles);
160+
}
161+
162+
$attributeClass = $attributes->getNamedItem('class');
163+
if ($attributeClass && self::$css) {
164+
$styles = self::parseStyleDeclarations(self::$css->getStyle('.' . $attributeClass->value), $styles);
165+
}
166+
152167
$attributeStyle = $attributes->getNamedItem('style');
153168
if ($attributeStyle) {
154169
$styles = self::parseStyle($attributeStyle, $styles);
@@ -168,6 +183,13 @@ protected static function parseInlineStyle($node, $styles = [])
168183
*/
169184
protected static function parseNode($node, $element, $styles = [], $data = []): void
170185
{
186+
if ($node->nodeName == 'style') {
187+
self::$css = new Css($node->textContent);
188+
self::$css->process();
189+
190+
return;
191+
}
192+
171193
// Populate styles array
172194
$styleTypes = ['font', 'paragraph', 'list', 'table', 'row', 'cell'];
173195
foreach ($styleTypes as $styleType) {
@@ -635,13 +657,21 @@ protected static function parseStyle($attribute, $styles)
635657
{
636658
$properties = explode(';', trim($attribute->value, " \t\n\r\0\x0B;"));
637659

660+
$selectors = [];
638661
foreach ($properties as $property) {
639662
[$cKey, $cValue] = array_pad(explode(':', $property, 2), 2, null);
640-
$cValue = trim($cValue ?? '');
641-
$cKey = strtolower(trim($cKey));
642-
switch ($cKey) {
663+
$selectors[strtolower(trim($cKey))] = trim($cValue ?? '');
664+
}
665+
666+
return self::parseStyleDeclarations($selectors, $styles);
667+
}
668+
669+
protected static function parseStyleDeclarations(array $selectors, array $styles)
670+
{
671+
foreach ($selectors as $property => $value) {
672+
switch ($property) {
643673
case 'text-decoration':
644-
switch ($cValue) {
674+
switch ($value) {
645675
case 'underline':
646676
$styles['underline'] = 'single';
647677

@@ -654,44 +684,44 @@ protected static function parseStyle($attribute, $styles)
654684

655685
break;
656686
case 'text-align':
657-
$styles['alignment'] = self::mapAlign($cValue);
687+
$styles['alignment'] = self::mapAlign($value);
658688

659689
break;
660690
case 'display':
661-
$styles['hidden'] = $cValue === 'none' || $cValue === 'hidden';
691+
$styles['hidden'] = $value === 'none' || $value === 'hidden';
662692

663693
break;
664694
case 'direction':
665-
$styles['rtl'] = $cValue === 'rtl';
695+
$styles['rtl'] = $value === 'rtl';
666696

667697
break;
668698
case 'font-size':
669-
$styles['size'] = Converter::cssToPoint($cValue);
699+
$styles['size'] = Converter::cssToPoint($value);
670700

671701
break;
672702
case 'font-family':
673-
$cValue = array_map('trim', explode(',', $cValue));
674-
$styles['name'] = ucwords($cValue[0]);
703+
$value = array_map('trim', explode(',', $value));
704+
$styles['name'] = ucwords($value[0]);
675705

676706
break;
677707
case 'color':
678-
$styles['color'] = trim($cValue, '#');
708+
$styles['color'] = trim($value, '#');
679709

680710
break;
681711
case 'background-color':
682-
$styles['bgColor'] = trim($cValue, '#');
712+
$styles['bgColor'] = trim($value, '#');
683713

684714
break;
685715
case 'line-height':
686716
$matches = [];
687-
if ($cValue === 'normal') {
717+
if ($value === 'normal') {
688718
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
689719
$spacing = 0;
690-
} elseif (preg_match('/([0-9]+\.?[0-9]*[a-z]+)/', $cValue, $matches)) {
720+
} elseif (preg_match('/([0-9]+\.?[0-9]*[a-z]+)/', $value, $matches)) {
691721
//matches number with a unit, e.g. 12px, 15pt, 20mm, ...
692722
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::EXACT;
693723
$spacing = Converter::cssToTwip($matches[1]);
694-
} elseif (preg_match('/([0-9]+)%/', $cValue, $matches)) {
724+
} elseif (preg_match('/([0-9]+)%/', $value, $matches)) {
695725
//matches percentages
696726
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
697727
//we are subtracting 1 line height because the Spacing writer is adding one line
@@ -700,72 +730,72 @@ protected static function parseStyle($attribute, $styles)
700730
//any other, wich is a multiplier. E.g. 1.2
701731
$spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
702732
//we are subtracting 1 line height because the Spacing writer is adding one line
703-
$spacing = ($cValue * Paragraph::LINE_HEIGHT) - Paragraph::LINE_HEIGHT;
733+
$spacing = ($value * Paragraph::LINE_HEIGHT) - Paragraph::LINE_HEIGHT;
704734
}
705735
$styles['spacingLineRule'] = $spacingLineRule;
706736
$styles['line-spacing'] = $spacing;
707737

708738
break;
709739
case 'letter-spacing':
710-
$styles['letter-spacing'] = Converter::cssToTwip($cValue);
740+
$styles['letter-spacing'] = Converter::cssToTwip($value);
711741

712742
break;
713743
case 'text-indent':
714-
$styles['indentation']['firstLine'] = Converter::cssToTwip($cValue);
744+
$styles['indentation']['firstLine'] = Converter::cssToTwip($value);
715745

716746
break;
717747
case 'font-weight':
718748
$tValue = false;
719-
if (preg_match('#bold#', $cValue)) {
749+
if (preg_match('#bold#', $value)) {
720750
$tValue = true; // also match bolder
721751
}
722752
$styles['bold'] = $tValue;
723753

724754
break;
725755
case 'font-style':
726756
$tValue = false;
727-
if (preg_match('#(?:italic|oblique)#', $cValue)) {
757+
if (preg_match('#(?:italic|oblique)#', $value)) {
728758
$tValue = true;
729759
}
730760
$styles['italic'] = $tValue;
731761

732762
break;
733763
case 'margin':
734-
$cValue = Converter::cssToTwip($cValue);
735-
$styles['spaceBefore'] = $cValue;
736-
$styles['spaceAfter'] = $cValue;
764+
$value = Converter::cssToTwip($value);
765+
$styles['spaceBefore'] = $value;
766+
$styles['spaceAfter'] = $value;
737767

738768
break;
739769
case 'margin-top':
740-
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
741-
$styles['spaceBefore'] = Converter::cssToTwip($cValue);
770+
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($value)
771+
$styles['spaceBefore'] = Converter::cssToTwip($value);
742772

743773
break;
744774
case 'margin-bottom':
745-
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
746-
$styles['spaceAfter'] = Converter::cssToTwip($cValue);
775+
// BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($value)
776+
$styles['spaceAfter'] = Converter::cssToTwip($value);
747777

748778
break;
749779
case 'border-color':
750-
self::mapBorderColor($styles, $cValue);
780+
self::mapBorderColor($styles, $value);
751781

752782
break;
753783
case 'border-width':
754-
$styles['borderSize'] = Converter::cssToPoint($cValue);
784+
$styles['borderSize'] = Converter::cssToPoint($value);
755785

756786
break;
757787
case 'border-style':
758-
$styles['borderStyle'] = self::mapBorderStyle($cValue);
788+
$styles['borderStyle'] = self::mapBorderStyle($value);
759789

760790
break;
761791
case 'width':
762-
if (preg_match('/([0-9]+[a-z]+)/', $cValue, $matches)) {
792+
if (preg_match('/([0-9]+[a-z]+)/', $value, $matches)) {
763793
$styles['width'] = Converter::cssToTwip($matches[1]);
764794
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::TWIP;
765-
} elseif (preg_match('/([0-9]+)%/', $cValue, $matches)) {
795+
} elseif (preg_match('/([0-9]+)%/', $value, $matches)) {
766796
$styles['width'] = $matches[1] * 50;
767797
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::PERCENT;
768-
} elseif (preg_match('/([0-9]+)/', $cValue, $matches)) {
798+
} elseif (preg_match('/([0-9]+)/', $value, $matches)) {
769799
$styles['width'] = $matches[1];
770800
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::AUTO;
771801
}
@@ -778,9 +808,9 @@ protected static function parseStyle($attribute, $styles)
778808
case 'border-left':
779809
// must have exact order [width color style], e.g. "1px #0011CC solid" or "2pt green solid"
780810
// Word does not accept shortened hex colors e.g. #CCC, only full e.g. #CCCCCC
781-
if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/', $cValue, $matches)) {
782-
if (false !== strpos($cKey, '-')) {
783-
$tmp = explode('-', $cKey);
811+
if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/', $value, $matches)) {
812+
if (false !== strpos($property, '-')) {
813+
$tmp = explode('-', $property);
784814
$which = $tmp[1];
785815
$which = ucfirst($which); // e.g. bottom -> Bottom
786816
} else {
@@ -803,13 +833,13 @@ protected static function parseStyle($attribute, $styles)
803833
break;
804834
case 'vertical-align':
805835
// https://developer.mozilla.org/en-US/docs/Web/CSS/vertical-align
806-
if (preg_match('#(?:top|bottom|middle|sub|baseline)#i', $cValue, $matches)) {
836+
if (preg_match('#(?:top|bottom|middle|sub|baseline)#i', $value, $matches)) {
807837
$styles['valign'] = self::mapAlignVertical($matches[0]);
808838
}
809839

810840
break;
811841
case 'page-break-after':
812-
if ($cValue == 'always') {
842+
if ($value == 'always') {
813843
$styles['isPageBreak'] = true;
814844
}
815845

tests/PhpWordTests/Shared/CssTest.php

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
/**
3+
* This file is part of PHPWord - A pure PHP library for reading and writing
4+
* word processing documents.
5+
*
6+
* PHPWord is free software distributed under the terms of the GNU Lesser
7+
* General Public License version 3 as published by the Free Software Foundation.
8+
*
9+
* For the full copyright and license information, please read the LICENSE
10+
* file that was distributed with this source code. For the full list of
11+
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
12+
*
13+
* @see https://github.com/PHPOffice/PHPWord
14+
*
15+
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
16+
*/
17+
18+
namespace PhpOffice\PhpWordTests\Shared;
19+
20+
use PhpOffice\PhpWord\Shared\Css;
21+
use PHPUnit\Framework\TestCase;
22+
23+
/**
24+
* Test class for PhpOffice\PhpWord\Shared\Css.
25+
*/
26+
class CssTest extends TestCase
27+
{
28+
public function testEmptyCss(): void
29+
{
30+
$css = new Css('');
31+
$css->process();
32+
33+
self::assertEquals([], $css->getStyles());
34+
}
35+
36+
public function testBasicCss(): void
37+
{
38+
$cssContent = '.pStyle {
39+
font-size:15px;
40+
}';
41+
42+
$css = new Css($cssContent);
43+
$css->process();
44+
45+
self::assertEquals([
46+
'.pStyle' => [
47+
'font-size' => '15px',
48+
],
49+
], $css->getStyles());
50+
self::assertEquals([
51+
'font-size' => '15px',
52+
], $css->getStyle('.pStyle'));
53+
}
54+
}

0 commit comments

Comments
 (0)