-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: escape unicode symbols from string before transfer to XML
- Loading branch information
Vitalii Shtykhno
committed
Oct 30, 2024
1 parent
2a135d3
commit ff02fe6
Showing
2 changed files
with
115 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
/** | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU General Public License | ||
|
@@ -15,9 +17,9 @@ | |
* along with this program; if not, write to the Free Software | ||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
* | ||
* Copyright (c) 2013-2020 (original work) Open Assessment Technologies SA (under the project TAO-PRODUCT); | ||
* Copyright (c) 2013-2024 (original work) Open Assessment Technologies SA (under the project TAO-PRODUCT); | ||
* | ||
* @author Jérôme Bogaerts <[email protected]> | ||
* @author Jérôme Bogaerts <[email protected]> | ||
* @license GPLv2 | ||
*/ | ||
|
||
|
@@ -42,11 +44,11 @@ class Utils | |
* This utility method enables you to know what is the location of an XML Schema Definition | ||
* file to be used to validate a $document for a given target namespace. | ||
* | ||
* @param DOMDocument $document A DOMDocument object. | ||
* @param string $namespaceUri A Namespace URI you want to know the related XSD file location. | ||
* @param DOMDocument $document A DOMDocument object. | ||
* @param string $namespaceUri A Namespace URI you want to know the related XSD file location. | ||
* @return bool|string False if no location can be found for $namespaceUri, otherwise the location of the XSD file. | ||
*/ | ||
public static function getXsdLocation(DOMDocument $document, $namespaceUri) | ||
public static function getXsdLocation(DOMDocument $document, $namespaceUri): bool|string | ||
{ | ||
$root = $document->documentElement; | ||
$location = false; | ||
|
@@ -76,7 +78,7 @@ public static function getXsdLocation(DOMDocument $document, $namespaceUri) | |
* Change the name of $element into $name. | ||
* | ||
* @param DOMElement $element A DOMElement object you want to change the name. | ||
* @param string $name The new name of $element. | ||
* @param string $name The new name of $element. | ||
* | ||
* @return DOMElement | ||
*/ | ||
|
@@ -109,7 +111,7 @@ public static function changeElementName(DOMElement $element, $name): DOMElement | |
* For instance, <m:math display="inline"><m:mi>x</m:mi></m:math> becomes | ||
* <math display="inline"><mi>x</mi></math>. | ||
* | ||
* @param DOMElement $element The DOMElement to be anonimized. | ||
* @param DOMElement $element The DOMElement to be anonimized. | ||
* @return DOMElement The anonimized DOMElement copy of $element. | ||
*/ | ||
public static function anonimizeElement(DOMElement $element): DOMElement | ||
|
@@ -158,7 +160,7 @@ public static function anonimizeElement(DOMElement $element): DOMElement | |
* | ||
* @param DOMElement $from The source DOMElement. | ||
* @param DOMElement $into The target DOMElement. | ||
* @param bool $deep Whether or not to import the whole node hierarchy. | ||
* @param bool $deep Whether or not to import the whole node hierarchy. | ||
*/ | ||
public static function importChildNodes(DOMElement $from, DOMElement $into, $deep = true): void | ||
{ | ||
|
@@ -201,8 +203,8 @@ public static function importAttributes(DOMElement $from, DOMElement $into): voi | |
* * > --> $gt; | ||
* * & --> & | ||
* | ||
* @param string $string An input string. | ||
* @param bool $isAttribute Whether or not to escape ', >, < which do not have to be escaped in attributes. | ||
* @param string $string An input string. | ||
* @param bool $isAttribute Whether or not to escape ', >, < which do not have to be escaped in attributes. | ||
* @return string An escaped string. | ||
*/ | ||
public static function escapeXmlSpecialChars($string, $isAttribute = false): string | ||
|
@@ -224,7 +226,7 @@ public static function escapeXmlSpecialChars($string, $isAttribute = false): str | |
* | ||
* Example: "minChoices" becomes "min-choices". | ||
* | ||
* @param string $qtiName | ||
* @param string $qtiName | ||
* @return string | ||
*/ | ||
public static function webComponentFriendlyAttributeName($qtiName): string | ||
|
@@ -239,7 +241,7 @@ public static function webComponentFriendlyAttributeName($qtiName): string | |
* | ||
* Example: "choiceInteraction" becomes "qti-choice-interaction". | ||
* | ||
* @param string $qtiName | ||
* @param string $qtiName | ||
* @return string | ||
*/ | ||
public static function webComponentFriendlyClassName($qtiName): string | ||
|
@@ -254,7 +256,7 @@ public static function webComponentFriendlyClassName($qtiName): string | |
* | ||
* Example: "qti-choice-interaction" becomes "choiceInteraction". | ||
* | ||
* @param string $wcName | ||
* @param string $wcName | ||
* @return string | ||
*/ | ||
public static function qtiFriendlyName($wcName): string | ||
|
@@ -268,9 +270,9 @@ public static function qtiFriendlyName($wcName): string | |
/** | ||
* Get the attribute value of a given DOMElement object, cast in a given datatype. | ||
* | ||
* @param DOMElement $element The element the attribute you want to retrieve the value is bound to. | ||
* @param string $attribute The attribute name. | ||
* @param string $datatype The returned datatype. Accepted values are 'string', 'integer', 'float', 'double' and 'boolean'. | ||
* @param DOMElement $element The element the attribute you want to retrieve the value is bound to. | ||
* @param string $attribute The attribute name. | ||
* @param string $datatype The returned datatype. Accepted values are 'string', 'integer', 'float', 'double' and 'boolean'. | ||
* @return mixed The attribute value with the provided $datatype, or null if the attribute does not exist in $element. | ||
* @throws InvalidArgumentException If $datatype is not in the range of possible values. | ||
*/ | ||
|
@@ -284,22 +286,24 @@ public static function getDOMElementAttributeAs(DOMElement $element, string $att | |
} | ||
|
||
switch ($datatype) { | ||
case 'string': | ||
return $attr; | ||
case 'string': | ||
return $attr; | ||
|
||
case 'integer': | ||
return (int)$attr; | ||
case 'integer': | ||
return (int)$attr; | ||
|
||
case 'double': | ||
case 'float': | ||
return (float)$attr; | ||
case 'double': | ||
case 'float': | ||
return (float)$attr; | ||
|
||
case 'boolean': | ||
return $attr === 'true'; | ||
case 'boolean': | ||
return $attr === 'true'; | ||
} | ||
|
||
if (in_array(Enumeration::class, class_implements($datatype), true)) { | ||
/** @var Enumeration $datatype */ | ||
/** | ||
* @var Enumeration $datatype | ||
*/ | ||
if ($attr !== null) { | ||
$constant = $datatype::getConstantByName($attr); | ||
// Returns the original value when it's unknown in the enumeration. | ||
|
@@ -318,9 +322,9 @@ public static function getDOMElementAttributeAs(DOMElement $element, string $att | |
/** | ||
* Set the attribute value of a given DOMElement object. Boolean values will be transformed | ||
* | ||
* @param DOMElement $element A DOMElement object. | ||
* @param string $attribute An XML attribute name. | ||
* @param mixed $value A given value. | ||
* @param DOMElement $element A DOMElement object. | ||
* @param string $attribute An XML attribute name. | ||
* @param mixed $value A given value. | ||
*/ | ||
public static function setDOMElementAttribute(DOMElement $element, string $attribute, $value): void | ||
{ | ||
|
@@ -331,7 +335,7 @@ public static function setDOMElementAttribute(DOMElement $element, string $attri | |
* Set the node value of a given DOMElement object. Boolean values will be transformed as 'true'|'false'. | ||
* | ||
* @param DOMElement $element A DOMElement object. | ||
* @param mixed $value A given value. | ||
* @param mixed $value A given value. | ||
*/ | ||
public static function setDOMElementValue(DOMElement $element, $value): void | ||
{ | ||
|
@@ -343,8 +347,8 @@ public static function setDOMElementValue(DOMElement $element, $value): void | |
* Boolean is converted to either 'true' or 'false' string. | ||
* Other variable types are optionally using string conversion. | ||
* | ||
* @param mixed $value | ||
* @param bool $encode | ||
* @param mixed $value | ||
* @param bool $encode | ||
* @return string | ||
*/ | ||
public static function valueAsString($value, $encode = true): string | ||
|
@@ -358,15 +362,26 @@ public static function valueAsString($value, $encode = true): string | |
return (string)$value; | ||
} | ||
|
||
public static function escapeUnicodeForXML($string): string | ||
{ | ||
return preg_replace_callback( | ||
'/[^\x09\x0A\x0D\x20-\x7F]/', | ||
function ($matches) { | ||
return sprintf('&#x%04X;', ord($matches[0])); | ||
}, | ||
$string | ||
); | ||
} | ||
|
||
/** | ||
* Get the child elements of a given element by tag name. This method does | ||
* not behave like DOMElement::getElementsByTagName. It only returns the direct | ||
* child elements that matches $tagName but does not go recursive. | ||
* | ||
* @param DOMElement $element A DOMElement object. | ||
* @param mixed $tagName The name of the tags you would like to retrieve or an array of tags to match. | ||
* @param bool $exclude (optional) Whether the $tagName parameter must be considered as a blacklist. | ||
* @param bool $withText (optional) Whether text nodes must be returned or not. | ||
* @param DOMElement $element A DOMElement object. | ||
* @param mixed $tagName The name of the tags you would like to retrieve or an array of tags to match. | ||
* @param bool $exclude (optional) Whether the $tagName parameter must be considered as a blacklist. | ||
* @param bool $withText (optional) Whether text nodes must be returned or not. | ||
* @return array An array of DOMElement objects. | ||
*/ | ||
public static function getChildElementsByTagName($element, $tagName, $exclude = false, $withText = false): array | ||
|
@@ -390,8 +405,8 @@ public static function getChildElementsByTagName($element, $tagName, $exclude = | |
/** | ||
* Get the children DOM Nodes with nodeType attribute equals to XML_ELEMENT_NODE. | ||
* | ||
* @param DOMElement $element A DOMElement object. | ||
* @param bool $withText Whether text nodes must be returned or not. | ||
* @param DOMElement $element A DOMElement object. | ||
* @param bool $withText Whether text nodes must be returned or not. | ||
* @return array An array of DOMNode objects. | ||
*/ | ||
public static function getChildElements($element, $withText = false): array | ||
|
@@ -412,8 +427,8 @@ public static function getChildElements($element, $withText = false): array | |
* Removes namespaces defined on non-root element when they are already | ||
* defined on the root element. | ||
* | ||
* @param string $subject | ||
* @param array $redundantNamespaces | ||
* @param string $subject | ||
* @param array $redundantNamespaces | ||
* @return string | ||
*/ | ||
public static function cleanRedundantNamespaces(string $subject, array $redundantNamespaces): string | ||
|
@@ -427,8 +442,8 @@ public static function cleanRedundantNamespaces(string $subject, array $redundan | |
/** | ||
* Removes all but first occurrences of a string within a string. | ||
* | ||
* @param string $subject | ||
* @param string $toRemove | ||
* @param string $subject | ||
* @param string $toRemove | ||
* @return string | ||
*/ | ||
public static function removeAllButFirstOccurrence(string $subject, string $toRemove): string | ||
|
@@ -445,12 +460,12 @@ public static function removeAllButFirstOccurrence(string $subject, string $toRe | |
/** | ||
* Finds all the custom namespaces defined in the xml payload. | ||
* | ||
* @param string $xml | ||
* @param string $xml | ||
* @return array | ||
*/ | ||
public static function findExternalNamespaces(string $xml): array | ||
{ | ||
$doc = new SimpleXMLElement($xml); | ||
$doc = new SimpleXMLElement(self::escapeUnicodeForXML($xml)); | ||
return array_filter( | ||
$doc->getDocNamespaces(), | ||
static function ($key) { | ||
|
@@ -461,9 +476,9 @@ static function ($key) { | |
} | ||
|
||
/** | ||
* @param callable $command | ||
* @param string $exceptionMessage | ||
* @param int $exceptionCode | ||
* @param callable $command | ||
* @param string $exceptionMessage | ||
* @param int $exceptionCode | ||
* @throws XmlStorageException | ||
*/ | ||
public static function executeSafeXmlCommand( | ||
|
@@ -495,7 +510,7 @@ public static function executeSafeXmlCommand( | |
/** | ||
* Format some $libXmlErrors into an array of strings instead of an array of arrays. | ||
* | ||
* @param LibXMLError[] $libXmlErrors | ||
* @param LibXMLError[] $libXmlErrors | ||
* @return string | ||
*/ | ||
protected static function formatLibXmlErrors(array $libXmlErrors): string | ||
|
@@ -504,22 +519,22 @@ protected static function formatLibXmlErrors(array $libXmlErrors): string | |
|
||
foreach ($libXmlErrors as $error) { | ||
switch ($error->level) { | ||
case LIBXML_ERR_WARNING: | ||
// Since QTI 2.2, some schemas are imported multiple times. | ||
// Xerces does not produce errors, but libxml does... | ||
if (preg_match('/Skipping import of schema located/ui', $error->message) === 0) { | ||
$formattedErrors[] = 'Warning: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.'; | ||
} | ||
case LIBXML_ERR_WARNING: | ||
// Since QTI 2.2, some schemas are imported multiple times. | ||
// Xerces does not produce errors, but libxml does... | ||
if (preg_match('/Skipping import of schema located/ui', $error->message) === 0) { | ||
$formattedErrors[] = 'Warning: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.'; | ||
} | ||
|
||
break; | ||
break; | ||
|
||
case LIBXML_ERR_ERROR: | ||
$formattedErrors[] = 'Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.'; | ||
break; | ||
case LIBXML_ERR_ERROR: | ||
$formattedErrors[] = 'Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.'; | ||
break; | ||
|
||
case LIBXML_ERR_FATAL: | ||
$formattedErrors[] = 'Fatal Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.'; | ||
break; | ||
case LIBXML_ERR_FATAL: | ||
$formattedErrors[] = 'Fatal Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.'; | ||
break; | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters