diff --git a/src/PhpWord/IOFactory.php b/src/PhpWord/IOFactory.php index 50c419cae2..7579a62fdc 100644 --- a/src/PhpWord/IOFactory.php +++ b/src/PhpWord/IOFactory.php @@ -36,7 +36,7 @@ abstract class IOFactory */ public static function createWriter(PhpWord $phpWord, $name = 'Word2007') { - if ($name !== 'WriterInterface' && !in_array($name, ['ODText', 'RTF', 'Word2007', 'HTML', 'PDF', 'EPub3'], true)) { + if ($name !== 'WriterInterface' && !in_array($name, ['ODText', 'RTF', 'Word2007', 'HTML', 'PDF', 'EPub3', 'WPS'], true)) { throw new Exception("\"{$name}\" is not a valid writer."); } diff --git a/src/PhpWord/Reader/WPS.php b/src/PhpWord/Reader/WPS.php new file mode 100644 index 0000000000..fa7f154390 --- /dev/null +++ b/src/PhpWord/Reader/WPS.php @@ -0,0 +1,132 @@ +isBinaryWpsFile($docFile)) { + return $this->loadBinaryWps($docFile, $phpWord); + } + + // Otherwise process as XML-based WPS file + $relationships = $this->readRelationships($docFile); + $readerParts = [ + 'content.xml' => 'Content', + 'meta.xml' => 'Meta', + ]; + foreach ($readerParts as $xmlFile => $partName) { + $this->readPart($phpWord, $relationships, $partName, $docFile, $xmlFile); + } + + return $phpWord; + } + + /** + * Check if the file is a binary WPS file. + * + * @param string $docFile + * + * @return bool + */ + private function isBinaryWpsFile($docFile) + { + $fileContent = file_get_contents($docFile, false, null, 0, 1024); + if (!is_string($fileContent)) { + return false; + } + + return preg_match(self::WPS_MAGIC_PATTERN, $fileContent) === 1; + } + + /** + * Load a binary WPS file. + * + * @param string $docFile + * + * @return PhpWord + */ + private function loadBinaryWps($docFile, PhpWord $phpWord) + { + $reader = new WPSBinaryReader(); + $text = $reader->extractText($docFile); + + if (!empty($text)) { + $section = $phpWord->addSection(); + $section->addText($text); + } + + return $phpWord; + } + + /** + * Read document part. + */ + private function readPart(PhpWord $phpWord, array $relationships, string $partName, string $docFile, string $xmlFile): void + { + $partClass = "PhpOffice\\PhpWord\\Reader\\WPS\\{$partName}"; + if (class_exists($partClass)) { + /** @var WPS\AbstractPart $part Type hint */ + $part = new $partClass($docFile, $xmlFile); + $part->setRels($relationships); + $part->read($phpWord); + } + } + + /** + * Read all relationship files. + */ + private function readRelationships(string $docFile): array + { + $rels = []; + $xmlFile = 'META-INF/manifest.xml'; + $xmlReader = new XMLReader(); + $xmlReader->getDomFromZip($docFile, $xmlFile); + $nodes = $xmlReader->getElements('manifest:file-entry'); + foreach ($nodes as $node) { + $type = $xmlReader->getAttribute('manifest:media-type', $node); + $target = $xmlReader->getAttribute('manifest:full-path', $node); + $rels[] = ['type' => $type, 'target' => $target]; + } + + return $rels; + } +} diff --git a/src/PhpWord/Reader/WPS/AbstractPart.php b/src/PhpWord/Reader/WPS/AbstractPart.php new file mode 100644 index 0000000000..1b850a2559 --- /dev/null +++ b/src/PhpWord/Reader/WPS/AbstractPart.php @@ -0,0 +1,30 @@ +docFile) === 0) { + return; + } + + $xmlReader = new XMLReader(); + $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); + $nodes = $xmlReader->getElements('office:body/office:text/*'); + if ($nodes->length > 0) { + $section = $phpWord->addSection(); + foreach ($nodes as $node) { + $this->readElement($xmlReader, $node, $section); + } + } + } + + /** + * Read element based on node name. + */ + private function readElement(XMLReader $xmlReader, DOMElement $node, \PhpOffice\PhpWord\Element\Section $parent): void + { + switch ($node->nodeName) { + case 'text:p': + $this->readParagraph($xmlReader, $node, $parent); + + break; + case 'text:h': + $this->readHeading($xmlReader, $node, $parent); + + break; + case 'table:table': + // Implement table reading as needed + break; + } + } + + /** + * Read paragraph. + */ + protected function readParagraph(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart = 'document'): void + { + $textRun = $parent->addTextRun(); + $nodes = $xmlReader->getElements('*', $domNode); + foreach ($nodes as $textNode) { + if ($textNode->nodeName == 'text:span') { + $text = $xmlReader->getValue('.', $textNode); + if (!empty($text)) { + $textRun->addText($text); + } + } elseif ($textNode->nodeName == 'text:line-break') { + $textRun->addTextBreak(); + } + } + + // If the paragraph has direct text content (not wrapped in spans) + $textContent = $this->getDirectTextContent($domNode); + if (!empty($textContent)) { + $textRun->addText($textContent); + } + } + + /** + * Read heading. + */ + private function readHeading(XMLReader $xmlReader, DOMElement $node, \PhpOffice\PhpWord\Element\Section $parent): void + { + $text = $xmlReader->getValue('.', $node); + $level = $xmlReader->getAttribute('text:outline-level', $node); + if (empty($level)) { + $level = 1; + } + $parent->addTitle($text, (int) $level); + } + + /** + * Get direct text content of a node, excluding child element content. + */ + private function getDirectTextContent(DOMElement $node): string + { + $textContent = ''; + foreach ($node->childNodes as $child) { + if ($child->nodeType === XML_TEXT_NODE) { + $textContent .= $child->nodeValue; + } + } + + return trim($textContent); + } +} diff --git a/src/PhpWord/Reader/WPS/Meta.php b/src/PhpWord/Reader/WPS/Meta.php new file mode 100644 index 0000000000..6414e236eb --- /dev/null +++ b/src/PhpWord/Reader/WPS/Meta.php @@ -0,0 +1,83 @@ +getDomFromZip($this->docFile, $this->xmlFile); + + $docProps = $phpWord->getDocInfo(); + + // Title + $title = $xmlReader->getValue('office:meta/dc:title'); + if (!empty($title)) { + $docProps->setTitle($title); + } + + // Subject + $subject = $xmlReader->getValue('office:meta/dc:subject'); + if (!empty($subject)) { + $docProps->setSubject($subject); + } + + // Creator + $creator = $xmlReader->getValue('office:meta/meta:initial-creator'); + if (!empty($creator)) { + $docProps->setCreator($creator); + } + + // Keywords + $keywords = $xmlReader->getValue('office:meta/meta:keyword'); + if (!empty($keywords)) { + $docProps->setKeywords($keywords); + } + + // Description + $description = $xmlReader->getValue('office:meta/dc:description'); + if (!empty($description)) { + $docProps->setDescription($description); + } + + // Category + $category = $xmlReader->getValue('office:meta/meta:user-defined[@meta:name="Category"]'); + if (!empty($category)) { + $docProps->setCategory($category); + } + + // Company + $company = $xmlReader->getValue('office:meta/meta:user-defined[@meta:name="Company"]'); + if (!empty($company)) { + $docProps->setCompany($company); + } + } +} diff --git a/src/PhpWord/Reader/WPSBinaryReader.php b/src/PhpWord/Reader/WPSBinaryReader.php new file mode 100644 index 0000000000..85f819de94 --- /dev/null +++ b/src/PhpWord/Reader/WPSBinaryReader.php @@ -0,0 +1,270 @@ +extractTextFromOleDocument($fileName); + } + + // Check for WPS magic + if (!preg_match(self::WPS_MAGIC_PATTERN, $fileContent, $matches, PREG_OFFSET_CAPTURE)) { + throw new Exception("No 'Magic' block: not a valid WPS file"); + } + + $magicType = $matches[1][0]; + $headersStart = $matches[0][1]; + + if ($magicType === 'CHNKINK') { + // Check WPS version + $versionData = unpack('v', substr($fileContent, $headersStart - 2, 2)); + if (!is_array($versionData) || !isset($versionData[1])) { + throw new Exception('Unable to read WPS version'); + } + $version = $versionData[1]; + if ($version < 8) { + throw new Exception('Unable to convert a WPS file prior to version 8'); + } + } + + // Get entries position and total entries + $entriesPos = $headersStart + 24; + if (strlen($fileContent) < $headersStart + 14) { + throw new Exception('File corrupt: not enough data for total entries'); + } + $data = unpack('x12/vTotalEntries', substr($fileContent, $headersStart, 14)); + if (!is_array($data) || !isset($data['TotalEntries'])) { // Ensure valid unpacked data + throw new Exception('File corrupt: unable to retrieve total entries'); + } + $totalEntries = $data['TotalEntries']; + + // Process entries to find TEXT section + $textData = ''; + while (true) { + [$entries, $nextOffset, $textHeaderOffset, $textSize] = + $this->processEntries(substr($fileContent, $entriesPos)); + + if ($textSize > 0) { + // TEXT section found + $textOffset = $textHeaderOffset + $headersStart; + + // Get text from main block + $blockSize = min(self::TEXT_BLOCK, $textSize); + $textData = substr($fileContent, $textOffset, (int) $blockSize); + $textSize -= $blockSize; + + // Handle additional blocks if present + if ($textSize > 0) { + $textOffset = 0x800; // Second block location + $blockSize = min(self::TEXT_BLOCK, $textSize); + $textData .= substr($fileContent, $textOffset, (int) $blockSize); + $textSize -= $blockSize; + } + + // Handle any remaining text + if ($textSize > 0) { + $textOffset = $textHeaderOffset + $headersStart + self::TEXT_BLOCK; + $textData .= substr($fileContent, $textOffset, (int) $textSize); + } + + break; + } + + $totalEntries -= $entries; + if ($totalEntries > 0 && $nextOffset > 0) { + $entriesPos = $nextOffset; + } else { + throw new Exception('Unable to find TEXT section. File corrupt?'); + } + } + + // Convert binary text to UTF-16 and then to UTF-8 + return $this->convertToUtf8($textData); + } + + /** + * Extract text from an OLE Compound Document formatted WPS file. + * + * @param string $fileName Path to the WPS file + * + * @return string Extracted text content + */ + private function extractTextFromOleDocument($fileName) + { + // This is the improved method from the second blog post + // For now we'll implement a simple version that extracts the CONTENTS stream + // and processes it similar to the direct binary approach + + // TODO: Implement proper OLE Compound Document reader + // For now, we'll extract all text content we can find + + $fileContent = file_get_contents($fileName); + $text = ''; + + if (!is_string($fileContent)) { + return $text; + } + + // Look for UTF-16 encoded text blocks (common in WPS files) + preg_match_all('/(?:[\x20-\x7E]\x00){4,}/', $fileContent, $matches); + + if (!empty($matches[0])) { + foreach ($matches[0] as $match) { + $text .= $this->convertToUtf8($match) . "\n"; + } + } + + return $text; + } + + /** + * Process entries in the WPS file to find the TEXT section. + * + * @param string $entryBuff The buffer containing entries + * + * @return array Array with entries count, next offset, text header offset and text size + */ + private function processEntries($entryBuff) + { + // Check if the buffer has enough data + if (strlen($entryBuff) < 8) { + throw new Exception('Invalid format - Entry buffer too short'); + } + + // Unpack entry header + $data = unpack('vmagic/vlocal/Inext_offset', substr($entryBuff, 0, 8)); + if (!is_array($data) || !isset($data['magic'], $data['local'], $data['next_offset'])) { // Ensure valid unpacked data + throw new Exception('Invalid format - Entry header unpacking failed'); + } + + if ($data['magic'] != self::ENTRY_MAGIC) { + throw new Exception('Invalid format - Entry magic tag incorrect'); + } + + $local = $data['local']; + $nextOffset = $data['next_offset']; + $entryPos = 0x08; // 2 WORDs & 1 DWORD + + // Process each entry + for ($i = 0; $i < $local; ++$i) { + // Get entry size + if (strlen($entryBuff) < $entryPos + 2) { + throw new Exception('Invalid format - Entry buffer too short'); + } + $sizeData = unpack('v', substr($entryBuff, $entryPos, 2)); + if (!is_array($sizeData) || count($sizeData) === 0) { + throw new Exception('Invalid format - Unable to unpack entry size'); + } + $size = $sizeData[1]; + + // Get name, offset and size + if (strlen($entryBuff) < $entryPos + $size) { + throw new Exception('Invalid format - Entry buffer too short'); + } + $entryData = substr($entryBuff, $entryPos, $size); + $entryInfo = unpack('x2/a4name/x10/Ioffset/Isize', $entryData); + if (!is_array($entryInfo) || !isset($entryInfo['name'], $entryInfo['offset'], $entryInfo['size'])) { // Ensure valid unpacked data + throw new Exception('Invalid format - Entry data unpacking failed'); + } + + if ($entryInfo['name'] === 'TEXT') { + // Success! Found TEXT section + return [$local, 0, $entryInfo['offset'], $entryInfo['size']]; + } + + $entryPos += $size; + } + + // No TEXT section found in this block, need to continue to next block + return [$local, $nextOffset, 0, 0]; + } + + /** + * Convert UTF-16 text to UTF-8. + * + * @param string $text Text in UTF-16 format + * + * @return string Text in UTF-8 format + */ + private function convertToUtf8($text) + { + // Remove carriage returns and convert to UTF-8 + $text = preg_replace('/\r/', "\r\n", $text); + + // Check if it's UTF-16LE (common in Windows files) + if (substr($text, 0, 2) === "\xFF\xFE") { + return mb_convert_encoding(substr($text, 2), 'UTF-8', 'UTF-16LE'); + } + + // Try to convert as UTF-16LE first (most WPS files use this) + $result = @mb_convert_encoding($text, 'UTF-8', 'UTF-16LE'); + + // If conversion looks good (no replacement characters), use it + if (strpos($result, '�') === false) { + return $result; + } + + // Otherwise try UTF-16BE + return mb_convert_encoding($text, 'UTF-8', 'UTF-16BE'); + } +} diff --git a/src/PhpWord/Writer/WPS.php b/src/PhpWord/Writer/WPS.php new file mode 100644 index 0000000000..b495722e82 --- /dev/null +++ b/src/PhpWord/Writer/WPS.php @@ -0,0 +1,104 @@ +setPhpWord($phpWord); + + // Create parts + $this->parts = [ + 'Content' => 'content.xml', + 'Styles' => 'styles.xml', + 'Meta' => 'meta.xml', + 'Manifest' => 'META-INF/manifest.xml', + ]; + foreach (array_keys($this->parts) as $partName) { + $partClass = "PhpOffice\\PhpWord\\Writer\\WPS\\Part\\{$partName}"; + if (class_exists($partClass)) { + /** @var AbstractPart $part */ + $part = new $partClass(); + $part->setParentWriter($this); + $this->writerParts[strtolower($partName)] = $part; + } + } + + // Set package paths + $this->mediaPaths = ['image' => 'Pictures/']; + } + + /** + * Save PhpWord to file. + */ + public function save(string $filename): void + { + $filename = $this->getTempFile($filename); + $zip = $this->getZipArchive($filename); + + // Add section media files + $sectionMedia = Media::getElements('section'); + if (!empty($sectionMedia)) { + $this->addFilesToPackage($zip, $sectionMedia); + } + + // Add header/footer media files + $headerMedia = Media::getElements('header'); + if (!empty($headerMedia)) { + $this->addFilesToPackage($zip, $headerMedia); + } + + $footerMedia = Media::getElements('footer'); + if (!empty($footerMedia)) { + $this->addFilesToPackage($zip, $footerMedia); + } + + // Make sure the META-INF directory exists + $zip->addEmptyDir('META-INF'); + + // Write parts + foreach ($this->parts as $partName => $fileName) { + if ($fileName === '') { + continue; + } + $part = $this->getWriterPart($partName); + if (!$part instanceof AbstractPart) { + continue; + } + + $zip->addFromString($fileName, $part->write()); + } + + // Close zip archive and cleanup temp file + $zip->close(); + $this->cleanupTempFile(); + } +} diff --git a/src/PhpWord/Writer/WPS/Media.php b/src/PhpWord/Writer/WPS/Media.php new file mode 100644 index 0000000000..e371fc4fc3 --- /dev/null +++ b/src/PhpWord/Writer/WPS/Media.php @@ -0,0 +1,71 @@ + [], + 'header' => [], + 'footer' => [], + ]; + + /** + * Add new media element. + */ + public static function addElement($container, $mediaType, $source, ?\PhpOffice\PhpWord\Element\Image $image = null): void + { + if (!in_array($mediaType, ['header', 'footer', 'section'])) { + return; + } + + self::$elements[$mediaType][] = ['source' => $source, 'target' => $container, 'type' => $image]; + } + + /** + * Get media elements. + */ + public static function getElements($container, $type = null): array + { + if ($type !== null) { + return self::$elements[$type] ?? []; + } + + return self::$elements; + } + + /** + * Clear media elements. + */ + public static function clearElements(): void + { + self::$elements = [ + 'section' => [], + 'header' => [], + 'footer' => [], + ]; + } +} diff --git a/src/PhpWord/Writer/WPS/Part/AbstractPart.php b/src/PhpWord/Writer/WPS/Part/AbstractPart.php new file mode 100644 index 0000000000..bb31893635 --- /dev/null +++ b/src/PhpWord/Writer/WPS/Part/AbstractPart.php @@ -0,0 +1,76 @@ +parentWriter = $parentWriter; + } + + /** + * Get parent writer. + */ + public function getParentWriter(): AbstractWriter + { + return $this->parentWriter; + } + + /** + * Get XML Writer. + */ + protected function getXmlWriter(): XMLWriter + { + if (!$this->xmlWriter instanceof XMLWriter) { + $compatibility = Settings::hasCompatibility() ? 1 : 0; // Convert boolean to integer + $this->xmlWriter = new XMLWriter($compatibility); + } + + return $this->xmlWriter; + } + + /** + * Write part. + */ + abstract public function write(): string; +} diff --git a/src/PhpWord/Writer/WPS/Part/Content.php b/src/PhpWord/Writer/WPS/Part/Content.php new file mode 100644 index 0000000000..0b24f24a4a --- /dev/null +++ b/src/PhpWord/Writer/WPS/Part/Content.php @@ -0,0 +1,196 @@ +getParentWriter()->getPhpWord(); + $xmlWriter = $this->getXmlWriter(); + + // XML header + $xmlWriter->startDocument('1.0', 'UTF-8'); + + // office:document-content + $xmlWriter->startElement('office:document-content'); + $xmlWriter->writeAttribute('xmlns:office', 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'); + $xmlWriter->writeAttribute('xmlns:style', 'urn:oasis:names:tc:opendocument:xmlns:style:1.0'); + $xmlWriter->writeAttribute('xmlns:text', 'urn:oasis:names:tc:opendocument:xmlns:text:1.0'); + $xmlWriter->writeAttribute('xmlns:table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0'); + $xmlWriter->writeAttribute('xmlns:draw', 'urn:oasis:names:tc:opendocument:xmlns:drawing:1.0'); + $xmlWriter->writeAttribute('xmlns:fo', 'urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0'); + $xmlWriter->writeAttribute('xmlns:xlink', 'http://www.w3.org/1999/xlink'); + $xmlWriter->writeAttribute('xmlns:dc', 'http://purl.org/dc/elements/1.1/'); + $xmlWriter->writeAttribute('xmlns:meta', 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0'); + $xmlWriter->writeAttribute('xmlns:number', 'urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0'); + $xmlWriter->writeAttribute('xmlns:svg', 'urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0'); + $xmlWriter->writeAttribute('xmlns:chart', 'urn:oasis:names:tc:opendocument:xmlns:chart:1.0'); + $xmlWriter->writeAttribute('xmlns:dr3d', 'urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0'); + $xmlWriter->writeAttribute('xmlns:math', 'http://www.w3.org/1998/Math/MathML'); + $xmlWriter->writeAttribute('xmlns:form', 'urn:oasis:names:tc:opendocument:xmlns:form:1.0'); + $xmlWriter->writeAttribute('xmlns:script', 'urn:oasis:names:tc:opendocument:xmlns:script:1.0'); + $xmlWriter->writeAttribute('xmlns:ooo', 'http://openoffice.org/2004/office'); + $xmlWriter->writeAttribute('xmlns:ooow', 'http://openoffice.org/2004/writer'); + $xmlWriter->writeAttribute('xmlns:oooc', 'http://openoffice.org/2004/calc'); + $xmlWriter->writeAttribute('xmlns:dom', 'http://www.w3.org/2001/xml-events'); + $xmlWriter->writeAttribute('xmlns:xforms', 'http://www.w3.org/2002/xforms'); + $xmlWriter->writeAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema'); + $xmlWriter->writeAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'); + $xmlWriter->writeAttribute('xmlns:rpt', 'http://openoffice.org/2005/report'); + $xmlWriter->writeAttribute('xmlns:of', 'urn:oasis:names:tc:opendocument:xmlns:of:1.2'); + $xmlWriter->writeAttribute('office:version', '1.2'); + + // office:scripts + $xmlWriter->startElement('office:scripts'); + $xmlWriter->endElement(); + + // office:font-face-decls + $xmlWriter->startElement('office:font-face-decls'); + $xmlWriter->startElement('style:font-face'); + $xmlWriter->writeAttribute('style:name', 'Arial'); + $xmlWriter->writeAttribute('svg:font-family', 'Arial'); + $xmlWriter->endElement(); + $xmlWriter->endElement(); + + // office:automatic-styles + $xmlWriter->startElement('office:automatic-styles'); + $xmlWriter->endElement(); + + // office:body + $xmlWriter->startElement('office:body'); + + // office:text + $xmlWriter->startElement('office:text'); + + // Write sections + $sections = $phpWord->getSections(); + foreach ($sections as $section) { + $this->writeSection($xmlWriter, $section); + } + + $xmlWriter->endElement(); // office:text + $xmlWriter->endElement(); // office:body + $xmlWriter->endElement(); // office:document-content + + return $xmlWriter->getData(); + } + + /** + * Write section. + */ + private function writeSection(XMLWriter $xmlWriter, Section $section): void + { + $xmlWriter->startElement('text:section'); + $xmlWriter->writeAttribute('text:style-name', 'Sect' . $section->getSectionId()); + $xmlWriter->writeAttribute('text:name', 'Section' . $section->getSectionId()); + + // Process all elements + $elements = $section->getElements(); + $this->writeElements($xmlWriter, $elements); + + $xmlWriter->endElement(); // text:section + } + + /** + * Write elements. + */ + private function writeElements(XMLWriter $xmlWriter, array $elements): void + { + foreach ($elements as $element) { + if ($element instanceof TextRun) { + $this->writeTextRun($xmlWriter, $element); + } elseif ($element instanceof Text) { + $this->writeText($xmlWriter, $element); + } elseif ($element instanceof Table) { + $this->writeTable($xmlWriter, $element); + } elseif ($element instanceof AbstractContainer) { + $this->writeElements($xmlWriter, $element->getElements()); + } + } + } + + /** + * Write text element. + */ + private function writeText(XMLWriter $xmlWriter, Text $text): void + { + $xmlWriter->startElement('text:p'); + $xmlWriter->writeRaw($text->getText()); + $xmlWriter->endElement(); + } + + /** + * Write text run element. + */ + private function writeTextRun(XMLWriter $xmlWriter, TextRun $textrun): void + { + $xmlWriter->startElement('text:p'); + + $elements = $textrun->getElements(); + foreach ($elements as $element) { + if ($element instanceof Text) { + $xmlWriter->writeRaw($element->getText()); + } + } + + $xmlWriter->endElement(); + } + + /** + * Write table element. + */ + private function writeTable(XMLWriter $xmlWriter, Table $table): void + { + $xmlWriter->startElement('table:table'); + $xmlWriter->writeAttribute('table:name', 'Table' . $table->getElementId()); + + $rows = $table->getRows(); + foreach ($rows as $row) { + $xmlWriter->startElement('table:table-row'); + + $cells = $row->getCells(); + foreach ($cells as $cell) { + $xmlWriter->startElement('table:table-cell'); + + $elements = $cell->getElements(); + $this->writeElements($xmlWriter, $elements); + + $xmlWriter->endElement(); // table:table-cell + } + + $xmlWriter->endElement(); // table:table-row + } + + $xmlWriter->endElement(); // table:table + } +} diff --git a/src/PhpWord/Writer/WPS/Part/Manifest.php b/src/PhpWord/Writer/WPS/Part/Manifest.php new file mode 100644 index 0000000000..6097f5ef89 --- /dev/null +++ b/src/PhpWord/Writer/WPS/Part/Manifest.php @@ -0,0 +1,114 @@ +getXmlWriter(); + + $xmlWriter->startDocument('1.0', 'UTF-8', 'yes'); + $xmlWriter->startElement('manifest:manifest'); + + // Write namespaces + $xmlWriter->writeAttribute('xmlns:manifest', 'urn:oasis:names:tc:opendocument:xmlns:manifest:1.0'); + + // Basic document entries + $this->writeManifestItem($xmlWriter, '/', 'application/vnd.wps-office.document'); + $this->writeManifestItem($xmlWriter, 'content.xml', 'text/xml'); + $this->writeManifestItem($xmlWriter, 'meta.xml', 'text/xml'); + + // Media files + $this->writeMediaFiles($xmlWriter); + + $xmlWriter->endElement(); // manifest:manifest + + return $xmlWriter->getData(); + } + + /** + * Write manifest item. + */ + private function writeManifestItem(XMLWriter $xmlWriter, string $href, string $mediaType): void + { + $xmlWriter->startElement('manifest:file-entry'); + $xmlWriter->writeAttribute('manifest:media-type', $mediaType); + $xmlWriter->writeAttribute('manifest:full-path', $href); + $xmlWriter->endElement(); + } + + /** + * Write media files. + */ + private function writeMediaFiles(XMLWriter $xmlWriter): void + { + // Document media + $media = Media::getElements('section'); + if (!empty($media)) { + foreach ($media as $medium) { + if ($medium['type'] == 'image') { + $this->writeManifestItem( + $xmlWriter, + 'Pictures/' . $medium['target'], + $this->getMediaType($medium['target']) + ); + } + } + } + } + + /** + * Get media type from file extension. + */ + private function getMediaType(string $filename): string + { + $extension = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); + + switch ($extension) { + case 'jpeg': + case 'jpg': + return 'image/jpeg'; + case 'png': + return 'image/png'; + case 'gif': + return 'image/gif'; + case 'bmp': + return 'image/bmp'; + case 'tiff': + case 'tif': + return 'image/tiff'; + case 'svg': + return 'image/svg+xml'; + default: + return 'application/octet-stream'; + } + } +} diff --git a/src/PhpWord/Writer/WPS/Part/Meta.php b/src/PhpWord/Writer/WPS/Part/Meta.php new file mode 100644 index 0000000000..dd5801cffd --- /dev/null +++ b/src/PhpWord/Writer/WPS/Part/Meta.php @@ -0,0 +1,125 @@ +getXmlWriter(); + $phpWord = $this->getParentWriter()->getPhpWord(); + $docInfo = $phpWord->getDocInfo(); + + $xmlWriter->startDocument('1.0', 'UTF-8', 'yes'); + $xmlWriter->startElement('office:document-meta'); + + $xmlWriter->writeAttribute('xmlns:office', 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'); + $xmlWriter->writeAttribute('xmlns:xlink', 'http://www.w3.org/1999/xlink'); + $xmlWriter->writeAttribute('xmlns:dc', 'http://purl.org/dc/elements/1.1/'); + $xmlWriter->writeAttribute('xmlns:meta', 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0'); + $xmlWriter->writeAttribute('xmlns:wps', 'http://wps.kdanmobile.com/2017/office'); + + $xmlWriter->startElement('office:meta'); + + // Creator + $creator = $docInfo->getCreator(); + if ($creator !== null) { + $xmlWriter->writeElement('meta:initial-creator', $creator); + $xmlWriter->writeElement('dc:creator', $creator); + } + + // Creation date + $createdDate = $docInfo->getCreated(); + if ($createdDate !== null) { + $xmlWriter->startElement('meta:creation-date'); + $xmlWriter->writeRaw((string) $createdDate); + $xmlWriter->endElement(); + } + + // Modification date + $modifiedDate = $docInfo->getModified(); + if ($modifiedDate !== null) { + $xmlWriter->startElement('dc:date'); + $xmlWriter->writeRaw((string) $modifiedDate); + $xmlWriter->endElement(); + } + + // Title + $title = $docInfo->getTitle(); + if ($title !== null) { + $xmlWriter->writeElement('dc:title', $title); + } + + // Description + $description = $docInfo->getDescription(); + if ($description !== null) { + $xmlWriter->writeElement('dc:description', $description); + } + + // Subject + $subject = $docInfo->getSubject(); + if ($subject !== null) { + $xmlWriter->writeElement('dc:subject', $subject); + } + + // Keywords + $keywords = $docInfo->getKeywords(); + if ($keywords !== null) { + $xmlWriter->writeElement('meta:keyword', $keywords); + } + + // Category + $category = $docInfo->getCategory(); + if ($category !== null) { + $this->writeUserDefined($xmlWriter, 'Category', $category); + } + + // Company + $company = $docInfo->getCompany(); + if ($company !== null) { + $this->writeUserDefined($xmlWriter, 'Company', $company); + } + + $xmlWriter->endElement(); // office:meta + $xmlWriter->endElement(); // office:document-meta + + return $xmlWriter->getData(); + } + + /** + * Write user defined value. + */ + private function writeUserDefined(XMLWriter $xmlWriter, string $name, string $value): void + { + $xmlWriter->startElement('meta:user-defined'); + $xmlWriter->writeAttribute('meta:name', $name); + $xmlWriter->writeRaw($value); + $xmlWriter->endElement(); + } +} diff --git a/tests/PhpWordTests/Reader/WPS/ContentTest.php b/tests/PhpWordTests/Reader/WPS/ContentTest.php new file mode 100644 index 0000000000..ad794fdc4d --- /dev/null +++ b/tests/PhpWordTests/Reader/WPS/ContentTest.php @@ -0,0 +1,143 @@ +tempFile = tempnam(sys_get_temp_dir(), 'wps'); + $zip = new ZipArchive(); + $zip->open($this->tempFile, ZipArchive::CREATE); + + // Add content.xml with sample data + $contentXml = ' + + + + Heading 1 + Simple paragraph + Paragraph with styled text + Paragraph with line break + + + '; + $zip->addFromString('content.xml', $contentXml); + $zip->close(); + } + + protected function tearDown(): void + { + if (file_exists($this->tempFile)) { + unlink($this->tempFile); + } + } + + public function testRead(): void + { + $phpWord = new PhpWord(); + + self::assertFileExists($this->tempFile); + // Added check to ensure file is not empty, preventing use of empty file in ZipArchive + self::assertGreaterThan(0, filesize($this->tempFile), 'Generated file is empty.'); + + $zip = new ZipArchive(); + $openResult = $zip->open($this->tempFile); + self::assertTrue($openResult === true, 'Unable to open generated zip archive'); + + $content = new Content($this->tempFile, 'content.xml'); + $content->read($phpWord); + + // Verify section and content was added + $sections = $phpWord->getSections(); + self::assertCount(1, $sections); + $section = $sections[0]; + $elements = $section->getElements(); + + // Should have elements: heading, and 3 paragraphs + self::assertCount(4, $elements); + + // Test heading + $heading = $elements[0]; + self::assertInstanceOf('PhpOffice\\PhpWord\\Element\\Title', $heading); + self::assertEquals('Heading 1', $heading->getText()); + self::assertEquals(1, $heading->getDepth()); + + // Test simple paragraph + $paragraph1 = $elements[1]; + self::assertInstanceOf('PhpOffice\\PhpWord\\Element\\TextRun', $paragraph1); + $paragraphElements = $paragraph1->getElements(); + self::assertCount(1, $paragraphElements); + if ($paragraphElements[0] instanceof \PhpOffice\PhpWord\Element\Text) { + self::assertEquals('Simple paragraph', $paragraphElements[0]->getText()); + } + + // Test paragraph with styled text + $paragraph2 = $elements[2]; + self::assertInstanceOf('PhpOffice\\PhpWord\\Element\\TextRun', $paragraph2); + $paragraphElements = $paragraph2->getElements(); + self::assertCount(2, $paragraphElements); + if ($paragraphElements[0] instanceof \PhpOffice\PhpWord\Element\Text) { + self::assertEquals('Paragraph with ', $paragraphElements[0]->getText()); + } + if ($paragraphElements[1] instanceof \PhpOffice\PhpWord\Element\Text) { + self::assertEquals('styled text', $paragraphElements[1]->getText()); + } + + // Test paragraph with line break + $paragraph3 = $elements[3]; + self::assertInstanceOf('PhpOffice\\PhpWord\\Element\\TextRun', $paragraph3); + $paragraphElements = $paragraph3->getElements(); + self::assertCount(3, $paragraphElements); + if ($paragraphElements[0] instanceof \PhpOffice\PhpWord\Element\Text) { + self::assertEquals('Paragraph with ', $paragraphElements[0]->getText()); + } + self::assertInstanceOf('PhpOffice\\PhpWord\\Element\\TextBreak', $paragraphElements[1]); + if ($paragraphElements[2] instanceof \PhpOffice\PhpWord\Element\Text) { + self::assertEquals('line break', $paragraphElements[2]->getText()); + } + } + + public function testReadEmptyContent(): void + { + // Create empty content file + $emptyFile = tempnam(sys_get_temp_dir(), 'wps'); + $zip = new ZipArchive(); + $zip->open($emptyFile, ZipArchive::CREATE); + $contentXml = ' + + + + + + '; + $zip->addFromString('content.xml', $contentXml); + $zip->close(); + + $phpWord = new PhpWord(); + $content = new Content($emptyFile, 'content.xml'); + $content->read($phpWord); + + // Verify that no elements were added to the section + $sections = $phpWord->getSections(); + self::assertCount(0, $sections); + + unlink($emptyFile); + } +} diff --git a/tests/PhpWordTests/Reader/WPS/MetaTest.php b/tests/PhpWordTests/Reader/WPS/MetaTest.php new file mode 100644 index 0000000000..19f464e48e --- /dev/null +++ b/tests/PhpWordTests/Reader/WPS/MetaTest.php @@ -0,0 +1,98 @@ +tempFile = tempnam(sys_get_temp_dir(), 'wps'); + $zip = new ZipArchive(); + $zip->open($this->tempFile, ZipArchive::CREATE); + + // Using a minimal meta.xml with sample data + $metaXml = ' + + + PHPWord + + '; + $zip->addFromString('meta.xml', $metaXml); + $zip->close(); + } + + protected function tearDown(): void + { + if (file_exists($this->tempFile)) { + unlink($this->tempFile); + } + } + + public function testRead(): void + { + $phpWord = new PhpWord(); + $meta = new Meta($this->tempFile, 'meta.xml'); + $meta->read($phpWord); + + $docInfo = $phpWord->getDocInfo(); + + // Verify all metadata properties were correctly extracted + self::assertEquals('Test Creator', $docInfo->getCreator()); + self::assertEquals('Test Document Title', $docInfo->getTitle()); + self::assertEquals('Test Document Subject', $docInfo->getSubject()); + self::assertEquals('Test Document Description', $docInfo->getDescription()); + self::assertEquals('test, keywords, phpword', $docInfo->getKeywords()); + self::assertEquals('Test Category', $docInfo->getCategory()); + self::assertEquals('Test Company', $docInfo->getCompany()); + } + + public function testReadWithMissingProperties(): void + { + // Create a file with minimal metadata + $minimalFile = tempnam(sys_get_temp_dir(), 'wps'); + $zip = new ZipArchive(); + $zip->open($minimalFile, ZipArchive::CREATE); + + $minimalMetaXml = ' + + + Only Title + + '; + $zip->addFromString('meta.xml', $minimalMetaXml); + $zip->close(); + + $phpWord = new PhpWord(); + $meta = new Meta($minimalFile, 'meta.xml'); + $meta->read($phpWord); + + $docInfo = $phpWord->getDocInfo(); + + // Verify only the title was set, other properties should have default values + self::assertEquals('Only Title', $docInfo->getTitle()); + self::assertEquals('', $docInfo->getCreator()); + self::assertEquals('', $docInfo->getSubject()); + self::assertEquals('', $docInfo->getDescription()); + self::assertEquals('', $docInfo->getKeywords()); + self::assertEquals('', $docInfo->getCategory()); + self::assertEquals('', $docInfo->getCompany()); + + unlink($minimalFile); + } +} diff --git a/tests/PhpWordTests/Reader/WPSTest.php b/tests/PhpWordTests/Reader/WPSTest.php new file mode 100644 index 0000000000..2b6fb93a27 --- /dev/null +++ b/tests/PhpWordTests/Reader/WPSTest.php @@ -0,0 +1,137 @@ +xmlWpsFile = tempnam(sys_get_temp_dir(), 'wps'); + $zip = new ZipArchive(); + $zip->open($this->xmlWpsFile, ZipArchive::CREATE); + + // Add content.xml + $contentXml = ' + + + + Test paragraph in content.xml + + + '; + $zip->addFromString('content.xml', $contentXml); + + // Add meta.xml + $metaXml = ' + + + Test Document Title + Test Author + + '; + $zip->addFromString('meta.xml', $metaXml); + + // Add manifest.xml + $manifestXml = ' + + + + + '; + $zip->addEmptyDir('META-INF'); + $zip->addFromString('META-INF/manifest.xml', $manifestXml); + + $zip->close(); + + // Create a temporary binary WPS file with magic pattern + $this->binaryWpsFile = tempnam(sys_get_temp_dir(), 'wps'); + file_put_contents($this->binaryWpsFile, 'CHNKWKS' . str_repeat(' ', 100) . 'Test text content'); + } + + protected function tearDown(): void + { + if (file_exists($this->xmlWpsFile)) { + unlink($this->xmlWpsFile); + } + + if (file_exists($this->binaryWpsFile)) { + unlink($this->binaryWpsFile); + } + } + + public function testLoadXmlBasedWpsFile(): void + { + $reader = new WPS(); + $phpWord = $reader->load($this->xmlWpsFile); + + self::assertInstanceOf(PhpWord::class, $phpWord); + + // Check that document info was read from meta.xml + $docInfo = $phpWord->getDocInfo(); + self::assertEquals('Test Document Title', $docInfo->getTitle()); + self::assertEquals('Test Author', $docInfo->getCreator()); + } + + public function testLoadBinaryWpsFile(): void + { + $reader = new WPS(); + $phpWord = $reader->load($this->binaryWpsFile); + + self::assertInstanceOf(PhpWord::class, $phpWord); + + // Binary WPS should have created a section with the extracted text + $sections = $phpWord->getSections(); + self::assertCount(1, $sections); + } + + public function testCanReadFlag(): void + { + $reader = new WPS(); + + // XML-based WPS file + self::assertTrue($reader->canRead($this->xmlWpsFile)); + + // Binary WPS file + self::assertTrue($reader->canRead($this->binaryWpsFile)); + + // Non-WPS file + $invalidFile = tempnam(sys_get_temp_dir(), 'txt'); + file_put_contents($invalidFile, 'Not a WPS file'); + self::assertFalse($reader->canRead($invalidFile)); + unlink($invalidFile); + } + + public function testInvalidFile(): void + { + $this->expectException(Exception::class); + + $reader = new WPS(); + $reader->load('/path/to/non/existing/file.wps'); + + // The exception should be thrown before this line + self::fail('Expected exception not thrown'); + } +} diff --git a/tests/PhpWordTests/Writer/WPS/MediaTest.php b/tests/PhpWordTests/Writer/WPS/MediaTest.php new file mode 100644 index 0000000000..f45308eb2b --- /dev/null +++ b/tests/PhpWordTests/Writer/WPS/MediaTest.php @@ -0,0 +1,88 @@ +write(); + + // Assert that the result is a string + self::assertIsString($result); + + // Assert that the result contains expected XML structure + self::assertStringContainsString('', $result); + self::assertStringContainsString('', $result); + self::assertStringContainsString('', $result); + self::assertStringContainsString('', $result); + self::assertStringContainsString('', $result); + } +} diff --git a/tests/PhpWordTests/Writer/WPS/Part/ManifestTest.php b/tests/PhpWordTests/Writer/WPS/Part/ManifestTest.php new file mode 100644 index 0000000000..1103a71ef8 --- /dev/null +++ b/tests/PhpWordTests/Writer/WPS/Part/ManifestTest.php @@ -0,0 +1,25 @@ +write(); + + // Assert that the result is a string + self::assertIsString($result); + + // Assert that the result contains expected XML structure + self::assertStringContainsString('write(); + + // Assert that the result is a string + self::assertIsString($result); + + // Assert that the result contains expected XML structure + self::assertStringContainsString('', $result); + } +} diff --git a/tests/PhpWordTests/Writer/WPSTest.php b/tests/PhpWordTests/Writer/WPSTest.php new file mode 100644 index 0000000000..766d4635c5 --- /dev/null +++ b/tests/PhpWordTests/Writer/WPSTest.php @@ -0,0 +1,111 @@ +addSection(); + $section->addText('Hello, WPS!'); + + $writer = new WPS($phpWord); + $tempFile = tempnam(sys_get_temp_dir(), 'wps'); + $writer->save($tempFile); + + self::assertFileExists($tempFile); + + // Test ZIP archive content + $zip = new ZipArchive(); + $zip->open($tempFile); + + // Verify required files exist + self::assertTrue($zip->locateName('content.xml') !== false); + self::assertTrue($zip->locateName('meta.xml') !== false); + self::assertTrue($zip->locateName('META-INF/manifest.xml') !== false); + + $zip->close(); + + $content = file_get_contents($tempFile); + if (is_string($content)) { + self::assertEquals('PK', substr($content, 0, 2)); + } + + unlink($tempFile); + } + + public function testWriterParts(): void + { + $phpWord = new PhpWord(); + $writer = new WPS($phpWord); + + // Test the writer parts are initialized correctly + self::assertInstanceOf('PhpOffice\\PhpWord\\Writer\\WPS\\Part\\Content', $writer->getWriterPart('content')); + self::assertInstanceOf('PhpOffice\\PhpWord\\Writer\\WPS\\Part\\Meta', $writer->getWriterPart('meta')); + self::assertInstanceOf('PhpOffice\\PhpWord\\Writer\\WPS\\Part\\Manifest', $writer->getWriterPart('manifest')); + } + + public function testWithMedia(): void + { + $phpWord = new PhpWord(); + $section = $phpWord->addSection(); + + // Add an image to the document + $imagePath = __DIR__ . '../tests/PhpWordTests/_files/images/earth.jpg'; + $section->addImage($imagePath); + + // Create header and add an image to it + $header = $section->addHeader(); + $header->addImage($imagePath); + + // Create footer and add an image to it + $footer = $section->addFooter(); + $footer->addImage($imagePath); + + $writer = new WPS($phpWord); + $tempFile = tempnam(sys_get_temp_dir(), 'wps'); + $writer->save($tempFile); + + // Test ZIP archive contains images + $zip = new ZipArchive(); + $zip->open($tempFile); + + // The exact path to images depends on the media handler implementation + // Just verify the Pictures directory exists + self::assertTrue($zip->locateName('Pictures/') !== false); + + $zip->close(); + unlink($tempFile); + } + + public function testSaveToOutput(): void + { + $phpWord = new PhpWord(); + $section = $phpWord->addSection(); + $section->addText('Hello, WPS!'); + + $writer = new WPS($phpWord); + + ob_start(); + $writer->save('php://output'); + $content = ob_get_clean(); + + // Check that the output starts with the ZIP file signature (PK header) + if (is_string($content)) { + self::assertEquals('PK', substr($content, 0, 2)); + } + } +} diff --git a/tests/PhpWordTests/Writer/Word2007/Element/TOCTest.php b/tests/PhpWordTests/Writer/Word2007/Element/TOCTest.php index 95e79114aa..d250382370 100644 --- a/tests/PhpWordTests/Writer/Word2007/Element/TOCTest.php +++ b/tests/PhpWordTests/Writer/Word2007/Element/TOCTest.php @@ -66,8 +66,14 @@ public function testWriteTitleWithoutpageNumber(): void //more than one title and random text for create more than one page for ($i = 1; $i <= 10; ++$i) { $section->addTitle('Title ' . $i, 1); - $content = file_get_contents('https://loripsum.net/api/10/long'); - \PhpOffice\PhpWord\Shared\Html::addHtml($section, $content ? $content : '', false, false); + // Using static content instead of making a network request + $content = '

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed non risus. + Suspendisse lectus tortor, dignissim sit amet, adipiscing nec, ultricies sed, dolor. + Cras elementum ultrices diam. Maecenas ligula massa, varius a, semper congue, + euismod non, mi.

Proin porttitor, orci nec nonummy molestie, enim est eleifend mi, + non fermentum diam nisl sit amet erat. Duis semper. Duis arcu massa, scelerisque vitae, + consequat in, pretium a, enim.

'; + \PhpOffice\PhpWord\Shared\Html::addHtml($section, $content, false, false); $section->addPageBreak(); }