Refactor text splitting logic into dedicated commands and introduce PartText DTO for improved modularity and maintainability.

This commit is contained in:
2026-06-20 16:05:31 +05:00
parent 92fd2cab1b
commit fe90500e44
6 changed files with 441 additions and 105 deletions
+40
View File
@@ -0,0 +1,40 @@
<?php declare(strict_types=1);
namespace KorElf\TranslateLaravel\DTO;
final class PartText
{
private array $texts = [];
private array $beforeTexts = [];
private array $afterTexts = [];
private int $part = 0;
public function add(string $text, ?string $beforeText = null, ?string $afterText = null): void
{
$this->texts[$this->part] = $text;
$this->beforeTexts[$this->part] = $beforeText;
$this->afterTexts[$this->part] = $afterText;
$this->part++;
}
public function getTextsForTranslation(): array
{
return $this->texts;
}
public function getTextsAfterTranslation(array $texts): string
{
$result = '';
foreach ($texts as $key => $text) {
if ($this->beforeTexts[$key] !== null) {
$result .= $this->beforeTexts[$key];
}
$result .= $text;
if ($this->afterTexts[$key] !== null) {
$result .= $this->afterTexts[$key];
}
}
return $result;
}
}
+2 -2
View File
@@ -10,7 +10,7 @@ final readonly class ProcessTranslateDto
private string $groupName, private string $groupName,
private string $key, private string $key,
private int $part, private int $part,
private string $text, private PartText $text,
private TextType $textType, private TextType $textType,
private string $targetLanguageCode, private string $targetLanguageCode,
private ?string $sourceLanguageCode = null, private ?string $sourceLanguageCode = null,
@@ -32,7 +32,7 @@ final readonly class ProcessTranslateDto
return $this->part; return $this->part;
} }
public function getText(): string public function getText(): PartText
{ {
return $this->text; return $this->text;
} }
+49 -15
View File
@@ -59,28 +59,62 @@ final class ProcessTranslate implements ShouldQueue, ShouldBeEncrypted, ProcessT
{ {
$param = $this->param; $param = $this->param;
$groupName = $param->getGroupName(); $groupName = $param->getGroupName();
$key = $param->getKey();
$part = $param->getPart();
$translated = Cache::get($groupName, []); $translated = Cache::get($groupName, []);
if (!isset($translated[$param->getKey()])) { if (!isset($translated[$param->getKey()])) {
$translated[$param->getKey()] = []; $translated[$param->getKey()] = [];
} }
$translate = Translate::service($param->getDriver()); $translated[$key][$part] = $this->translate();
$function = $param->getTextType()->functionName();
$key = $param->getKey();
$part = $param->getPart();
$translated[$key][$part] = $param->getText();
if (\trim($param->getText()) !== '') {
$translated[$key][$part] = $translate->{$function}(
$param->getText(),
$param->getTargetLanguageCode(),
$param->getSourceLanguageCode()
);
}
Cache::put($groupName, $translated, 86400); Cache::put($groupName, $translated, 86400);
} }
private function translate(): string
{
$param = $this->param;
$translate = Translate::service($param->getDriver());
$function = $param->getTextType()->functionName();
$partText = $param->getText();
$originalTexts = $partText->getTextsForTranslation();
$textsForTranslation = [];
$originalKeysByTranslationIndex = [];
foreach ($originalTexts as $originalKey => $text) {
if (\trim($text) === '') {
continue;
}
$translationIndex = count($textsForTranslation);
$textsForTranslation[$translationIndex] = $text;
$originalKeysByTranslationIndex[$translationIndex] = $originalKey;
}
if ($textsForTranslation === []) {
return $partText->getTextsAfterTranslation($originalTexts);
}
$translatedTexts = $translate->{$function}(
$textsForTranslation,
$param->getTargetLanguageCode(),
$param->getSourceLanguageCode()
);
foreach ($translatedTexts as $translationIndex => $translatedText) {
if (!isset($originalKeysByTranslationIndex[$translationIndex])) {
continue;
}
$originalKey = $originalKeysByTranslationIndex[$translationIndex];
$originalTexts[$originalKey] = $translatedText;
}
return $partText->getTextsAfterTranslation($originalTexts);
}
} }
@@ -0,0 +1,270 @@
<?php declare(strict_types=1);
namespace KorElf\TranslateLaravel\Translate\SplitTextIntoParts;
use DOMNodeList;
use KorElf\TranslateLaravel\DTO\PartText;
use DOMNode;
use DOMDocument;
final class SplitHTMLCommand
{
private array $parts = [];
private int $currentPart = 0;
private int $currentLength = 0;
private string $currentText = '';
private string $currentBeforeText = '';
private string $currentAfterText = '';
public function __construct(private readonly int $maxLength)
{
}
/**
* @param string $html
* @return array<int, PartText>
*/
public function execute(string $html): array
{
$this->currentPart = 0;
$this->currentLength = 0;
$this->currentText = '';
$this->parts = [
0 => new PartText(),
];
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML(
mb_convert_encoding('<body>' . $html . '</body>', 'HTML-ENTITIES', 'UTF-8'),
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
);
$this->processNodes($dom, $dom->documentElement->childNodes);
if ($this->currentLength > 0) {
$this->addTextToPart();
$this->currentText = '';
$this->currentBeforeText = '';
$this->currentAfterText = '';
$this->currentLength = 0;
$this->currentPart++;
}
libxml_clear_errors();
return $this->parts;
}
private function processNodes(DOMDocument $dom, DOMNodeList $nodes): void
{
/** @var DOMNode $node */
foreach ($nodes as $node) {
if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
$this->processText($node->textContent);
continue;
}
if ($node->nodeType === XML_ELEMENT_NODE) {
$this->processHtml($dom, $node);
continue;
}
}
}
private function processText(string $text): void
{
$currentLength = mb_strlen($text);
if ($this->currentLength + $currentLength <= $this->maxLength) {
$this->currentText .= $text;
$this->currentLength += $currentLength;
return;
}
if ($this->currentLength > 0) {
$this->addTextToPart();
$this->newPart();
}
if ($currentLength <= $this->maxLength) {
$this->currentText .= $text;
$this->currentLength += $currentLength;
return;
}
$this->splitLongText($text);
}
private function splitLongText(string $text): void
{
// Common expression for searching for sentences.
$sentenceEndings = '/(?<=[.?!])[ \t]+(?=[A-ZА-Я])/u';
//Dividing the text into sentences
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
foreach ($sentences as $sentence) {
$currentLength = mb_strlen($sentence);
if ($this->currentLength > 0) {
$currentLength += 1;
}
// If adding a sentence does not exceed the limit, add it to the current part.
if ($this->currentLength + $currentLength <= $this->maxLength) {
$this->currentText .= (empty($this->currentText) ? '' : ' ') . $sentence;
$this->currentLength += $currentLength;
continue;
}
$this->addTextToPart();
$this->newPart();
$this->currentText = $sentence;
$this->currentLength = $currentLength;
}
}
private function processHtml(DOMDocument $dom, DOMNode $node): void
{
$html = $dom->saveHTML($node);
$currentLength = mb_strlen($html);
if ($this->currentLength + $currentLength <= $this->maxLength) {
$this->currentText .= $html;
$this->currentLength += $currentLength;
return;
}
if ($this->currentLength > 0) {
$this->addTextToPart();
$this->newPart();
}
if ($currentLength <= $this->maxLength) {
$this->currentText .= $html;
$this->currentLength += $currentLength;
return;
}
if ($node->hasChildNodes()) {
$tag = strtolower($node->nodeName);
if ($tag === 'table') {
$this->splitTable($dom, $node);
return;
}
$this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
$this->processNodes($dom, $node->childNodes);
$this->currentAfterText .= '</' . $tag . '>';
$this->addTextToPart();
$this->newPart();
return;
}
}
private function splitTable(DOMDocument $dom, DOMNode $node): void
{
if ($this->currentLength > 0) {
$this->addTextToPart();
$this->newPart();
}
$this->currentBeforeText .= '<table' . $this->attributesToString($node) . '>';
if ($node->hasChildNodes()) {
foreach ($node->childNodes as $childNode) {
if ($childNode->nodeType === XML_TEXT_NODE || $childNode->nodeType === XML_CDATA_SECTION_NODE) {
continue;
}
if ($childNode->nodeType !== XML_ELEMENT_NODE) {
continue;
}
$this->processTableSection($dom, $childNode);
}
}
$this->currentAfterText .= '</table>';
$this->addTextToPart();
$this->newPart();
}
private function processTableSection(DOMDocument $dom, DOMNode $node): void
{
$tag = strtolower($node->nodeName);
if ($tag === 'tr') {
$this->processTableTr($dom, $node);
}
if (in_array($tag, ['thead', 'tbody', 'tfoot'], true)) {
$this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
foreach ($node->childNodes as $childNode) {
$tagNode = strtolower($childNode->nodeName);
if ($tagNode === 'tr') {
$this->processTableTr($dom, $childNode);
}
}
$this->currentAfterText .= '</' . $tag . '>';
}
}
private function processTableTr(DOMDocument $dom, DOMNode $node): void
{
$this->currentBeforeText .= '<tr' . $this->attributesToString($node) . '>';
foreach ($node->childNodes as $childNode) {
$tagNode = strtolower($childNode->nodeName);
if ($tagNode === 'td') {
$this->processTableTd($dom, $childNode);
}
}
$this->currentAfterText .= '</tr>';
$this->addTextToPart();
}
private function processTableTd(DOMDocument $dom, DOMNode $node): void
{
$this->currentBeforeText .= '<td' . $this->attributesToString($node) . '>';
$this->processNodes($dom, $node->childNodes);
$this->currentAfterText .= '</td>';
$this->addTextToPart();
}
private function attributesToString(DOMNode $node): string
{
if (!$node->hasAttributes()) {
return '';
}
$parts = [];
foreach ($node->attributes as $attr) {
$parts[] = sprintf(
' %s="%s"',
$attr->nodeName,
htmlspecialchars($attr->nodeValue ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
);
}
return implode('', $parts);
}
private function addTextToPart(): void
{
$this->parts[$this->currentPart]->add($this->currentText, $this->currentBeforeText, $this->currentAfterText);
$this->currentText = '';
$this->currentBeforeText = '';
$this->currentAfterText = '';
}
private function newPart(): void
{
$this->currentText = '';
$this->currentBeforeText = '';
$this->currentAfterText = '';
$this->currentLength = 0;
$this->currentPart++;
$this->parts[$this->currentPart] = new PartText();
}
}
@@ -0,0 +1,50 @@
<?php declare(strict_types=1);
namespace KorElf\TranslateLaravel\Translate\SplitTextIntoParts;
use KorElf\TranslateLaravel\DTO\PartText;
final readonly class SplitTextCommand
{
/**
* @param string $text
* @param int $maxLength
* @return array<int, PartText>
*/
public function execute(string $text, int $maxLength): array
{
// Common expression for searching for sentences.
$sentenceEndings = '/(?<=[.?!])[ \t]+(?=[A-ZА-Я])/u';
//Dividing the text into sentences
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
$parts = [];
$currentPart = '';
foreach ($sentences as $sentence) {
$part = new PartText();
// If adding a sentence does not exceed the limit, add it to the current part.
if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
$currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
continue;
}
// Otherwise, save the current part and start a new one.
if (!empty($currentPart)) {
$part->add($currentPart);
$parts[] = $part;
}
$currentPart = $sentence;
}
if (!empty($currentPart)) {
$part = new PartText();
$part->add($currentPart);
$parts[] = $part;
}
return $parts;
}
}
+30 -88
View File
@@ -2,114 +2,56 @@
namespace KorElf\TranslateLaravel\Translate; namespace KorElf\TranslateLaravel\Translate;
use KorElf\TranslateLaravel\DTO\PartText;
use KorElf\TranslateLaravel\Enums\TextType; use KorElf\TranslateLaravel\Enums\TextType;
use KorElf\TranslateLaravel\Facades\Translate; use KorElf\TranslateLaravel\Facades\Translate;
use DOMDocument; use KorElf\TranslateLaravel\Translate\SplitTextIntoParts\SplitHTMLCommand;
use KorElf\TranslateLaravel\Translate\SplitTextIntoParts\SplitTextCommand;
final readonly class SplitTextIntoPartsCommand final readonly class SplitTextIntoPartsCommand
{ {
/**
* @param string $text
* @param TextType $textType
* @param string|null $driver
* @return array<int, PartText>
*/
public function execute(string $text, TextType $textType, ?string $driver): array public function execute(string $text, TextType $textType, ?string $driver): array
{ {
$maxLength = Translate::getLimit($driver); $maxLength = Translate::getLimit($driver);
$maxLength = $maxLength['max_symbols'] ?? null; $maxLength = $maxLength['max_symbols'] ?? null;
if ($maxLength === null || $maxLength > mb_strlen($text)) { if ($maxLength === null || $maxLength > mb_strlen($text)) {
return [$text]; $part = new PartText();
$part->add($text);
return [$part];
} }
if ($textType === TextType::Html) { if ($textType === TextType::Html) {
return $this->splitHtmlText($text, $maxLength); return $this->splitHtml($text, $maxLength);
} }
return $this->splitTextBySentences($text, $maxLength); return $this->splitText($text, $maxLength);
} }
private function splitTextBySentences(string $text, int $maxLength): array /**
* @param string $text
* @param int $maxLength
* @return array<int, PartText>
*/
private function splitText(string $text, int $maxLength): array
{ {
// Common expression for searching for sentences. $splitTextCommand = new SplitTextCommand();
$sentenceEndings = '/(?<=[.?!])\s+(?=[A-ZА-Я])/u'; return $splitTextCommand->execute($text, $maxLength);
//Dividing the text into sentences
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
$parts = [];
$currentPart = '';
foreach ($sentences as $sentence) {
// If adding a sentence does not exceed the limit, add it to the current part.
if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
$currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
continue;
}
// Otherwise, save the current part and start a new one.
if (!empty($currentPart)) {
$parts[] = $currentPart;
$currentPart = '';
}
$currentPart = $sentence;
}
if (!empty($currentPart)) {
$parts[] = $currentPart;
}
return $parts;
} }
private function splitHtmlText(string $html, int $maxLength): array /**
* @param string $html
* @param int $maxLength
* @return array<int, PartText>
*/
private function splitHtml(string $html, int $maxLength): array
{ {
libxml_use_internal_errors(true); $splitHTMLCommand = new SplitHTMLCommand($maxLength);
return $splitHTMLCommand->execute($html);
$dom = new DOMDocument();
$dom->loadHTML(mb_convert_encoding('<body>' . $html . '</body>', 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$parts = [];
$currentPart = new DOMDocument();
$currentLength = 0;
// Iterate through all child nodes of the root element
foreach ($dom->documentElement->childNodes as $node) {
if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
// Dividing text nodes into sentences
$sentences = preg_split('/(?<=[.?!])\s+(?=[A-ZА-Я])/', $node->textContent, -1, PREG_SPLIT_NO_EMPTY);
foreach ($sentences as $sentence) {
$sentenceHtml = htmlspecialchars($sentence);
if ($currentLength + mb_strlen($sentenceHtml) > $maxLength) {
$parts[] = $currentPart->saveHTML();
// Start new part
$currentPart = new DOMDocument();
$currentLength = 0;
}
$textNode = $currentPart->createTextNode($sentence . ' ');
$currentPart->appendChild($textNode);
$currentLength += mb_strlen($sentenceHtml);
}
continue;
}
$nodeHtml = $dom->saveHTML($node);
if ($currentLength + mb_strlen($nodeHtml) > $maxLength) {
$parts[] = $currentPart->saveHTML();
// Start new part
$currentPart = new DOMDocument();
$currentLength = mb_strlen($nodeHtml);
}
$currentPart->appendChild($currentPart->importNode($node, true));
$currentLength += mb_strlen($nodeHtml);
}
if ($currentPart->hasChildNodes()) {
$parts[] = $currentPart->saveHTML();
}
libxml_clear_errors();
return $parts;
} }
} }