Skip to content

Commit ec85d7d

Browse files
committed
#80: Basic HTML reader
1 parent 0164e37 commit ec85d7d

File tree

8 files changed

+160
-6
lines changed

8 files changed

+160
-6
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ This is the changelog between releases of PHPWord. Releases are listed in revers
44

55
## 0.11.0 - Not yet released
66

7-
This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four new elements were added: TextBox, ListItemRun, Field, and Line. Relative and absolute positioning for images and textboxes were added. Writer classes were refactored into parts, elements, and styles. ODT and RTF features were enhanced. Ability to add elements to PHPWord object via HTML were implemeted. RTF reader were initiated.
7+
This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four new elements were added: TextBox, ListItemRun, Field, and Line. Relative and absolute positioning for images and textboxes were added. Writer classes were refactored into parts, elements, and styles. ODT and RTF features were enhanced. Ability to add elements to PHPWord object via HTML were implemeted. RTF and HTML reader were initiated.
88

99
### Features
1010

@@ -33,6 +33,7 @@ This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four
3333
- RTF Reader: Basic RTF reader - @ivanlanin GH-72 GH-252
3434
- Element: New `Line` element - @basjan GH-253
3535
- Title: Ability to apply numbering in heading - @ivanlanin GH-193
36+
- HTML Reader: Basic HTML reader - @ivanlanin GH-80
3637

3738
### Bugfixes
3839

samples/Sample_30_ReadHTML.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
include_once 'Sample_Header.php';
3+
4+
// Read contents
5+
$name = basename(__FILE__, '.php');
6+
$source = realpath(__DIR__ . "/resources/{$name}.html");
7+
8+
echo date('H:i:s'), " Reading contents from `{$source}`", EOL;
9+
$phpWord = \PhpOffice\PhpWord\IOFactory::load($source, 'HTML');
10+
11+
// Save file
12+
echo write($phpWord, basename(__FILE__, '.php'), $writers);
13+
if (!CLI) {
14+
include_once 'Sample_Footer.php';
15+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<html>
2+
<head>
3+
<meta charset="UTF-8" />
4+
<title>PHPWord</title>
5+
</head>
6+
<body>
7+
<h1>Adding element via HTML</h1>
8+
<p>Some well formed HTML snippet needs to be used</p>
9+
<p>With for example <strong>some<sup>1</sup> <em>inline</em> formatting</strong><sub>1</sub></p>
10+
<p>Unordered (bulleted) list:</p>
11+
<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>
12+
<p>Ordered (numbered) list:</p>
13+
<ol><li>Item 1</li><li>Item 2</li></ol>
14+
</body>
15+
</html>

src/PhpWord/IOFactory.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public static function createWriter(PhpWord $phpWord, $name = 'Word2007')
5151
*/
5252
public static function createReader($name = 'Word2007')
5353
{
54-
if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText', 'RTF'))) {
54+
if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText', 'RTF', 'HTML'))) {
5555
throw new Exception("\"{$name}\" is not a valid reader.");
5656
}
5757

src/PhpWord/Reader/HTML.php

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?php
2+
/**
3+
* This file is part of PHPWord - A pure PHP library for reading and writing
4+
* word processing documents.
5+
*
6+
* PHPWord is free software distributed under the terms of the GNU Lesser
7+
* General Public License version 3 as published by the Free Software Foundation.
8+
*
9+
* For the full copyright and license information, please read the LICENSE
10+
* file that was distributed with this source code. For the full list of
11+
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
12+
*
13+
* @link https://github.com/PHPOffice/PHPWord
14+
* @copyright 2010-2014 PHPWord contributors
15+
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
16+
*/
17+
18+
namespace PhpOffice\PhpWord\Reader;
19+
20+
use PhpOffice\PhpWord\PhpWord;
21+
use PhpOffice\PhpWord\Shared\Html as HTMLParser;
22+
23+
/**
24+
* HTML Reader class
25+
*
26+
* @since 0.11.0
27+
*/
28+
class HTML extends AbstractReader implements ReaderInterface
29+
{
30+
/**
31+
* Loads PhpWord from file
32+
*
33+
* @param string $docFile
34+
* @throws \Exception
35+
* @return \PhpOffice\PhpWord\PhpWord
36+
*/
37+
public function load($docFile)
38+
{
39+
$phpWord = new PhpWord();
40+
41+
if ($this->canRead($docFile)) {
42+
$section = $phpWord->addSection();
43+
HTMLParser::addHtml($section, file_get_contents($docFile), true);
44+
} else {
45+
throw new \Exception("Cannot read {$docFile}.");
46+
}
47+
48+
return $phpWord;
49+
}
50+
}

src/PhpWord/Shared/Html.php

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,27 @@ class Html
3232
* Note: $stylesheet parameter is removed to avoid PHPMD error for unused parameter
3333
*
3434
* @param \PhpOffice\PhpWord\Element\AbstractContainer $element Where the parts need to be added
35-
* @param string $html the code to parse
35+
* @param string $html The code to parse
36+
* @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
3637
*/
37-
public static function addHtml($element, $html)
38+
public static function addHtml($element, $html, $fullHTML = false)
3839
{
3940
/*
4041
* @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
4142
* which could be applied when such an element occurs in the parseNode function.
4243
*/
44+
45+
// Preprocess: remove all line ends, decode HTML entity, and add body tag for HTML fragments
4346
$html = str_replace(array("\n", "\r"), '', $html);
47+
$html = html_entity_decode($html);
48+
if ($fullHTML === false) {
49+
$html = '<body>' . $html . '</body>';
50+
}
4451

52+
// Load DOM
4553
$dom = new \DOMDocument();
4654
$dom->preserveWhiteSpace = true;
47-
$dom->loadXML('<body>' . html_entity_decode($html) . '</body>');
48-
55+
$dom->loadXML($html);
4956
$node = $dom->getElementsByTagName('body');
5057

5158
self::parseNode($node->item(0), $element);
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
<?php
2+
/**
3+
* This file is part of PHPWord - A pure PHP library for reading and writing
4+
* word processing documents.
5+
*
6+
* PHPWord is free software distributed under the terms of the GNU Lesser
7+
* General Public License version 3 as published by the Free Software Foundation.
8+
*
9+
* For the full copyright and license information, please read the LICENSE
10+
* file that was distributed with this source code. For the full list of
11+
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
12+
*
13+
* @link https://github.com/PHPOffice/PHPWord
14+
* @copyright 2010-2014 PHPWord contributors
15+
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
16+
*/
17+
18+
namespace PhpOffice\PhpWord\Tests\Reader;
19+
20+
use PhpOffice\PhpWord\IOFactory;
21+
22+
/**
23+
* Test class for PhpOffice\PhpWord\Reader\HTML
24+
*
25+
* @coversDefaultClass \PhpOffice\PhpWord\Reader\HTML
26+
* @runTestsInSeparateProcesses
27+
*/
28+
class HTMLTest extends \PHPUnit_Framework_TestCase
29+
{
30+
/**
31+
* Test load
32+
*/
33+
public function testLoad()
34+
{
35+
$filename = __DIR__ . '/../_files/documents/reader.html';
36+
$phpWord = IOFactory::load($filename, 'HTML');
37+
$this->assertInstanceOf('PhpOffice\\PhpWord\\PhpWord', $phpWord);
38+
}
39+
40+
/**
41+
* Test load exception
42+
*
43+
* @expectedException \Exception
44+
* @expectedExceptionMessage Cannot read
45+
*/
46+
public function testLoadException()
47+
{
48+
$filename = __DIR__ . '/../_files/documents/foo.html';
49+
IOFactory::load($filename, 'HTML');
50+
}
51+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<html>
2+
<head>
3+
<meta charset="UTF-8" />
4+
<title>PHPWord</title>
5+
</head>
6+
<body>
7+
<h1>Adding element via HTML</h1>
8+
<p>Some well formed HTML snippet needs to be used</p>
9+
<p>With for example <strong>some<sup>1</sup> <em>inline</em> formatting</strong><sub>1</sub></p>
10+
<p>Unordered (bulleted) list:</p>
11+
<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>
12+
<p>Ordered (numbered) list:</p>
13+
<ol><li>Item 1</li><li>Item 2</li></ol>
14+
</body>
15+
</html>

0 commit comments

Comments
 (0)