Skip to content

Commit

Permalink
fix: escape unicode symbols from string before transfer to XML
Browse files Browse the repository at this point in the history
  • Loading branch information
Vitalii Shtykhno committed Oct 30, 2024
1 parent 2a135d3 commit ff02fe6
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 61 deletions.
137 changes: 76 additions & 61 deletions src/qtism/data/storage/xml/Utils.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<?php

declare(strict_types=1);

/**
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
Expand All @@ -15,9 +17,9 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (c) 2013-2020 (original work) Open Assessment Technologies SA (under the project TAO-PRODUCT);
* Copyright (c) 2013-2024 (original work) Open Assessment Technologies SA (under the project TAO-PRODUCT);
*
* @author Jérôme Bogaerts <[email protected]>
* @author Jérôme Bogaerts <[email protected]>
* @license GPLv2
*/

Expand All @@ -42,11 +44,11 @@ class Utils
* This utility method enables you to know what is the location of an XML Schema Definition
* file to be used to validate a $document for a given target namespace.
*
* @param DOMDocument $document A DOMDocument object.
* @param string $namespaceUri A Namespace URI you want to know the related XSD file location.
* @param DOMDocument $document A DOMDocument object.
* @param string $namespaceUri A Namespace URI you want to know the related XSD file location.
* @return bool|string False if no location can be found for $namespaceUri, otherwise the location of the XSD file.
*/
public static function getXsdLocation(DOMDocument $document, $namespaceUri)
public static function getXsdLocation(DOMDocument $document, $namespaceUri): bool|string
{
$root = $document->documentElement;
$location = false;
Expand Down Expand Up @@ -76,7 +78,7 @@ public static function getXsdLocation(DOMDocument $document, $namespaceUri)
* Change the name of $element into $name.
*
* @param DOMElement $element A DOMElement object you want to change the name.
* @param string $name The new name of $element.
* @param string $name The new name of $element.
*
* @return DOMElement
*/
Expand Down Expand Up @@ -109,7 +111,7 @@ public static function changeElementName(DOMElement $element, $name): DOMElement
* For instance, <m:math display="inline"><m:mi>x</m:mi></m:math> becomes
* <math display="inline"><mi>x</mi></math>.
*
* @param DOMElement $element The DOMElement to be anonimized.
* @param DOMElement $element The DOMElement to be anonimized.
* @return DOMElement The anonimized DOMElement copy of $element.
*/
public static function anonimizeElement(DOMElement $element): DOMElement
Expand Down Expand Up @@ -158,7 +160,7 @@ public static function anonimizeElement(DOMElement $element): DOMElement
*
* @param DOMElement $from The source DOMElement.
* @param DOMElement $into The target DOMElement.
* @param bool $deep Whether or not to import the whole node hierarchy.
* @param bool $deep Whether or not to import the whole node hierarchy.
*/
public static function importChildNodes(DOMElement $from, DOMElement $into, $deep = true): void
{
Expand Down Expand Up @@ -201,8 +203,8 @@ public static function importAttributes(DOMElement $from, DOMElement $into): voi
* * > --> $gt;
* * & --> &amp;
*
* @param string $string An input string.
* @param bool $isAttribute Whether or not to escape ', >, < which do not have to be escaped in attributes.
* @param string $string An input string.
* @param bool $isAttribute Whether or not to escape ', >, < which do not have to be escaped in attributes.
* @return string An escaped string.
*/
public static function escapeXmlSpecialChars($string, $isAttribute = false): string
Expand All @@ -224,7 +226,7 @@ public static function escapeXmlSpecialChars($string, $isAttribute = false): str
*
* Example: "minChoices" becomes "min-choices".
*
* @param string $qtiName
* @param string $qtiName
* @return string
*/
public static function webComponentFriendlyAttributeName($qtiName): string
Expand All @@ -239,7 +241,7 @@ public static function webComponentFriendlyAttributeName($qtiName): string
*
* Example: "choiceInteraction" becomes "qti-choice-interaction".
*
* @param string $qtiName
* @param string $qtiName
* @return string
*/
public static function webComponentFriendlyClassName($qtiName): string
Expand All @@ -254,7 +256,7 @@ public static function webComponentFriendlyClassName($qtiName): string
*
* Example: "qti-choice-interaction" becomes "choiceInteraction".
*
* @param string $wcName
* @param string $wcName
* @return string
*/
public static function qtiFriendlyName($wcName): string
Expand All @@ -268,9 +270,9 @@ public static function qtiFriendlyName($wcName): string
/**
* Get the attribute value of a given DOMElement object, cast in a given datatype.
*
* @param DOMElement $element The element the attribute you want to retrieve the value is bound to.
* @param string $attribute The attribute name.
* @param string $datatype The returned datatype. Accepted values are 'string', 'integer', 'float', 'double' and 'boolean'.
* @param DOMElement $element The element the attribute you want to retrieve the value is bound to.
* @param string $attribute The attribute name.
* @param string $datatype The returned datatype. Accepted values are 'string', 'integer', 'float', 'double' and 'boolean'.
* @return mixed The attribute value with the provided $datatype, or null if the attribute does not exist in $element.
* @throws InvalidArgumentException If $datatype is not in the range of possible values.
*/
Expand All @@ -284,22 +286,24 @@ public static function getDOMElementAttributeAs(DOMElement $element, string $att
}

switch ($datatype) {
case 'string':
return $attr;
case 'string':
return $attr;

case 'integer':
return (int)$attr;
case 'integer':
return (int)$attr;

case 'double':
case 'float':
return (float)$attr;
case 'double':
case 'float':
return (float)$attr;

case 'boolean':
return $attr === 'true';
case 'boolean':
return $attr === 'true';
}

if (in_array(Enumeration::class, class_implements($datatype), true)) {
/** @var Enumeration $datatype */
/**
* @var Enumeration $datatype
*/
if ($attr !== null) {
$constant = $datatype::getConstantByName($attr);
// Returns the original value when it's unknown in the enumeration.
Expand All @@ -318,9 +322,9 @@ public static function getDOMElementAttributeAs(DOMElement $element, string $att
/**
* Set the attribute value of a given DOMElement object. Boolean values will be transformed
*
* @param DOMElement $element A DOMElement object.
* @param string $attribute An XML attribute name.
* @param mixed $value A given value.
* @param DOMElement $element A DOMElement object.
* @param string $attribute An XML attribute name.
* @param mixed $value A given value.
*/
public static function setDOMElementAttribute(DOMElement $element, string $attribute, $value): void
{
Expand All @@ -331,7 +335,7 @@ public static function setDOMElementAttribute(DOMElement $element, string $attri
* Set the node value of a given DOMElement object. Boolean values will be transformed as 'true'|'false'.
*
* @param DOMElement $element A DOMElement object.
* @param mixed $value A given value.
* @param mixed $value A given value.
*/
public static function setDOMElementValue(DOMElement $element, $value): void
{
Expand All @@ -343,8 +347,8 @@ public static function setDOMElementValue(DOMElement $element, $value): void
* Boolean is converted to either 'true' or 'false' string.
* Other variable types are optionally using string conversion.
*
* @param mixed $value
* @param bool $encode
* @param mixed $value
* @param bool $encode
* @return string
*/
public static function valueAsString($value, $encode = true): string
Expand All @@ -358,15 +362,26 @@ public static function valueAsString($value, $encode = true): string
return (string)$value;
}

public static function escapeUnicodeForXML($string): string
{
return preg_replace_callback(
'/[^\x09\x0A\x0D\x20-\x7F]/',
function ($matches) {
return sprintf('&#x%04X;', ord($matches[0]));
},
$string
);
}

/**
* Get the child elements of a given element by tag name. This method does
* not behave like DOMElement::getElementsByTagName. It only returns the direct
* child elements that matches $tagName but does not go recursive.
*
* @param DOMElement $element A DOMElement object.
* @param mixed $tagName The name of the tags you would like to retrieve or an array of tags to match.
* @param bool $exclude (optional) Whether the $tagName parameter must be considered as a blacklist.
* @param bool $withText (optional) Whether text nodes must be returned or not.
* @param DOMElement $element A DOMElement object.
* @param mixed $tagName The name of the tags you would like to retrieve or an array of tags to match.
* @param bool $exclude (optional) Whether the $tagName parameter must be considered as a blacklist.
* @param bool $withText (optional) Whether text nodes must be returned or not.
* @return array An array of DOMElement objects.
*/
public static function getChildElementsByTagName($element, $tagName, $exclude = false, $withText = false): array
Expand All @@ -390,8 +405,8 @@ public static function getChildElementsByTagName($element, $tagName, $exclude =
/**
* Get the children DOM Nodes with nodeType attribute equals to XML_ELEMENT_NODE.
*
* @param DOMElement $element A DOMElement object.
* @param bool $withText Whether text nodes must be returned or not.
* @param DOMElement $element A DOMElement object.
* @param bool $withText Whether text nodes must be returned or not.
* @return array An array of DOMNode objects.
*/
public static function getChildElements($element, $withText = false): array
Expand All @@ -412,8 +427,8 @@ public static function getChildElements($element, $withText = false): array
* Removes namespaces defined on non-root element when they are already
* defined on the root element.
*
* @param string $subject
* @param array $redundantNamespaces
* @param string $subject
* @param array $redundantNamespaces
* @return string
*/
public static function cleanRedundantNamespaces(string $subject, array $redundantNamespaces): string
Expand All @@ -427,8 +442,8 @@ public static function cleanRedundantNamespaces(string $subject, array $redundan
/**
* Removes all but first occurrences of a string within a string.
*
* @param string $subject
* @param string $toRemove
* @param string $subject
* @param string $toRemove
* @return string
*/
public static function removeAllButFirstOccurrence(string $subject, string $toRemove): string
Expand All @@ -445,12 +460,12 @@ public static function removeAllButFirstOccurrence(string $subject, string $toRe
/**
* Finds all the custom namespaces defined in the xml payload.
*
* @param string $xml
* @param string $xml
* @return array
*/
public static function findExternalNamespaces(string $xml): array
{
$doc = new SimpleXMLElement($xml);
$doc = new SimpleXMLElement(self::escapeUnicodeForXML($xml));
return array_filter(
$doc->getDocNamespaces(),
static function ($key) {
Expand All @@ -461,9 +476,9 @@ static function ($key) {
}

/**
* @param callable $command
* @param string $exceptionMessage
* @param int $exceptionCode
* @param callable $command
* @param string $exceptionMessage
* @param int $exceptionCode
* @throws XmlStorageException
*/
public static function executeSafeXmlCommand(
Expand Down Expand Up @@ -495,7 +510,7 @@ public static function executeSafeXmlCommand(
/**
* Format some $libXmlErrors into an array of strings instead of an array of arrays.
*
* @param LibXMLError[] $libXmlErrors
* @param LibXMLError[] $libXmlErrors
* @return string
*/
protected static function formatLibXmlErrors(array $libXmlErrors): string
Expand All @@ -504,22 +519,22 @@ protected static function formatLibXmlErrors(array $libXmlErrors): string

foreach ($libXmlErrors as $error) {
switch ($error->level) {
case LIBXML_ERR_WARNING:
// Since QTI 2.2, some schemas are imported multiple times.
// Xerces does not produce errors, but libxml does...
if (preg_match('/Skipping import of schema located/ui', $error->message) === 0) {
$formattedErrors[] = 'Warning: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.';
}
case LIBXML_ERR_WARNING:
// Since QTI 2.2, some schemas are imported multiple times.
// Xerces does not produce errors, but libxml does...
if (preg_match('/Skipping import of schema located/ui', $error->message) === 0) {
$formattedErrors[] = 'Warning: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.';
}

break;
break;

case LIBXML_ERR_ERROR:
$formattedErrors[] = 'Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.';
break;
case LIBXML_ERR_ERROR:
$formattedErrors[] = 'Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.';
break;

case LIBXML_ERR_FATAL:
$formattedErrors[] = 'Fatal Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.';
break;
case LIBXML_ERR_FATAL:
$formattedErrors[] = 'Fatal Error: ' . trim($error->message) . ' at ' . $error->line . ':' . $error->column . '.';
break;
}
}

Expand Down
39 changes: 39 additions & 0 deletions test/qtismtest/data/storage/xml/XmlUtilsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,45 @@ public function testFindCustomNamespaces(): void
);
}

public function testProcessSpecialCharsetWithoutError(): void
{
$xml = ('<?xml version="1.0" encoding="UTF-8"?>
<assessmentResult
xmlns="http://www.imsglobal.org/xsd/imsqti_result_v2p1"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<context/>
<testResult identifier="44127db28512-suomynona#903756e974e7#e94025be336b1f89159af64b1f6eda5d470ac8d61#local-dev-acc.nextgen-stack-local" datestamp="2024-10-30T12:56:32+00:00"/>
<itemResult identifier="item-1" datestamp="2024-10-30T12:56:32+00:00" sessionStatus="final">
<responseVariable identifier="numAttempts" cardinality="single" baseType="integer">
<candidateResponse>
<value>1</value>
</candidateResponse>
</responseVariable>
<responseVariable identifier="duration" cardinality="single" baseType="duration">
<candidateResponse>
<value>PT22S</value>
</candidateResponse>
</responseVariable>
<outcomeVariable identifier="completionStatus" cardinality="single" baseType="identifier">
<value>completed</value>
</outcomeVariable>
<outcomeVariable identifier="SCORE" cardinality="single" baseType="float">
<value>0</value>
</outcomeVariable>
<outcomeVariable identifier="MAXSCORE" cardinality="single" baseType="float">
<value>1</value>
</outcomeVariable>
<responseVariable identifier="RESPONSE" cardinality="single" baseType="string">
<candidateResponse>
<value>160\\u00b4\\b°</value>
</candidateResponse>
</responseVariable>
</itemResult>
</assessmentResult>
');
$this->assertNotNull(Utils::findExternalNamespaces($xml));
}

public function testremoveAllButFirstOccurrence(): void
{
$subject = 'abc 12 abc 345abc678abc';
Expand Down

0 comments on commit ff02fe6

Please sign in to comment.