-
Notifications
You must be signed in to change notification settings - Fork 144
Increase performance up to 15000% #98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,22 +26,25 @@ | |
*/ | ||
class Parser { | ||
|
||
private $sText; | ||
private $aText; | ||
private $iCurrentPosition; | ||
private $oParserSettings; | ||
private $sCharset; | ||
private $iLength; | ||
private $peekCache = null; | ||
private $blockRules; | ||
private $aSizeUnits; | ||
|
||
public function __construct($sText, Settings $oParserSettings = null) { | ||
$this->sText = $sText; | ||
$this->iCurrentPosition = 0; | ||
if ($oParserSettings === null) { | ||
$oParserSettings = Settings::create(); | ||
} | ||
$this->oParserSettings = $oParserSettings; | ||
if ($this->oParserSettings->bMultibyteSupport) { | ||
$this->aText = preg_split('//u', $sText, null, PREG_SPLIT_NO_EMPTY); | ||
} else { | ||
$this->aText = str_split($sText); | ||
} | ||
$this->blockRules = explode('/', AtRule::BLOCK_RULES); | ||
|
||
foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) { | ||
|
@@ -56,7 +59,7 @@ public function __construct($sText, Settings $oParserSettings = null) { | |
|
||
public function setCharset($sCharset) { | ||
$this->sCharset = $sCharset; | ||
$this->iLength = $this->strlen($this->sText); | ||
$this->iLength = count($this->aText); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see a problem here: splitting should only happen after the charset is known (because it depends on the charset used). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it so, why is |
||
} | ||
|
||
public function getCharset() { | ||
|
@@ -284,9 +287,8 @@ private function parseRuleSet($oRuleSet) { | |
try { | ||
$sConsume = $this->consumeUntil(array("\n", ";", '}'), true); | ||
// We need to “unfind” the matches to the end of the ruleSet as this will be matched later | ||
if($this->streql($this->substr($sConsume, $this->strlen($sConsume)-1, 1), '}')) { | ||
if($this->streql(substr($sConsume, -1), '}')) { | ||
--$this->iCurrentPosition; | ||
$this->peekCache = null; | ||
} else { | ||
$this->consumeWhiteSpace(); | ||
while ($this->comes(';')) { | ||
|
@@ -469,8 +471,8 @@ private function parseURLValue() { | |
} | ||
|
||
/** | ||
* Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too. | ||
*/ | ||
* Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too. | ||
*/ | ||
private function identifierIs($sIdentifier, $sMatch) { | ||
return (strcasecmp($sIdentifier, $sMatch) === 0) | ||
?: preg_match("/^(-\\w+-)?$sMatch$/i", $sIdentifier) === 1; | ||
|
@@ -484,38 +486,28 @@ private function comes($sString, $bCaseInsensitive = false) { | |
} | ||
|
||
private function peek($iLength = 1, $iOffset = 0) { | ||
if (($peek = (!$iOffset && ($iLength === 1))) && | ||
!is_null($this->peekCache)) { | ||
return $this->peekCache; | ||
} | ||
$iOffset += $this->iCurrentPosition; | ||
if ($iOffset >= $this->iLength) { | ||
return ''; | ||
} | ||
$iLength = min($iLength, $this->iLength-$iOffset); | ||
$out = $this->substr($this->sText, $iOffset, $iLength); | ||
if ($peek) { | ||
$this->peekCache = $out; | ||
} | ||
$out = $this->substr($iOffset, $iLength); | ||
return $out; | ||
} | ||
|
||
private function consume($mValue = 1) { | ||
if (is_string($mValue)) { | ||
$iLength = $this->strlen($mValue); | ||
if (!$this->streql($this->substr($this->sText, $this->iCurrentPosition, $iLength), $mValue)) { | ||
if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) { | ||
throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5))); | ||
} | ||
$this->iCurrentPosition += $this->strlen($mValue); | ||
$this->peekCache = null; | ||
return $mValue; | ||
} else { | ||
if ($this->iCurrentPosition + $mValue > $this->iLength) { | ||
throw new UnexpectedTokenException($mValue, $this->peek(5), 'count'); | ||
} | ||
$sResult = $this->substr($this->sText, $this->iCurrentPosition, $mValue); | ||
$sResult = $this->substr($this->iCurrentPosition, $mValue); | ||
$this->iCurrentPosition += $mValue; | ||
$this->peekCache = null; | ||
return $sResult; | ||
} | ||
} | ||
|
@@ -587,15 +579,23 @@ private function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false) | |
} | ||
|
||
private function inputLeft() { | ||
return $this->substr($this->sText, $this->iCurrentPosition, -1); | ||
return $this->substr($this->iCurrentPosition, -1); | ||
} | ||
|
||
private function substr($sString, $iStart, $iLength) { | ||
if ($this->oParserSettings->bMultibyteSupport) { | ||
return mb_substr($sString, $iStart, $iLength, $this->sCharset); | ||
} else { | ||
return substr($sString, $iStart, $iLength); | ||
private function substr($iStart, $iLength) { | ||
if ($iLength < 0) { | ||
$iLength = $this->iLength - $iStart + $iLength; | ||
} | ||
if ($iStart + $iLength > $this->iLength) { | ||
$iLength = $this->iLength - $iStart; | ||
} | ||
$out = ''; | ||
while ($iLength > 0) { | ||
$out .= $this->aText[$iStart]; | ||
$iStart++; | ||
$iLength--; | ||
} | ||
return $out; | ||
} | ||
|
||
private function strlen($sString) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’m not sure
preg_split
is multibyte-safe. Sure it supports utf-8 (if compiled in) but I don’t think it copes well with other encodings like utf-16, shift-jis. Also there is no way to pass the encoding used. I’ve never added unit tests for other encodings, though, because the identifier parsing code wasn’t actually able to match any encoding where the identifiers weren’t ASCII. Maybe I’ll just make that my next big rework project…