From e2eb80f14051189081aa1e37ab7ae5784e352562 Mon Sep 17 00:00:00 2001
From: Leonid Nikitin
Date: Sat, 20 Jun 2026 16:04:21 +0500
Subject: [PATCH 1/3] Update Yandex Translate documentation link in README.
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 20d636f..9e444d1 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
### TRANSLATE_SERVICE
По умолчанию через какой сервис обращаться за переводом. На данный момент доступен только сервисы - **yandex**, **log**.
-**yandex** - https://yandex.cloud/ru/services/translate
+**yandex** - https://aistudio.yandex.ru/docs/ru/translate/concepts/
**log** - для проверки и внедрения в свой продукт.
### TRANSLATE_YANDEX_FOLDER_ID
From 92fd2cab1b6c5bd2d9b4daf9ee0af43e242373da Mon Sep 17 00:00:00 2001
From: Leonid Nikitin
Date: Sat, 20 Jun 2026 16:05:07 +0500
Subject: [PATCH 2/3] Expand `illuminate/support` version constraints and add
`ext-mbstring` dependency in composer.json.
---
composer.json | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/composer.json b/composer.json
index 3427dc0..a63e37f 100644
--- a/composer.json
+++ b/composer.json
@@ -19,11 +19,12 @@
],
"require": {
"php": "^8.2",
- "illuminate/support": "^10.0|^11.0",
+ "illuminate/support": "^10.0|^11.0|^12.0|^13.0",
"guzzlehttp/guzzle": "^7.0.1",
"web-token/jwt-framework": "^3.0",
"ext-libxml": "*",
- "ext-dom": "*"
+ "ext-dom": "*",
+ "ext-mbstring": "*"
},
"extra": {
"laravel": {
From fe90500e441c91a76bfca98b48cce8e8af60df82 Mon Sep 17 00:00:00 2001
From: Leonid Nikitin
Date: Sat, 20 Jun 2026 16:05:31 +0500
Subject: [PATCH 3/3] Refactor text splitting logic into dedicated commands and
introduce `PartText` DTO for improved modularity and maintainability.
---
src/DTO/PartText.php | 40 +++
src/DTO/ProcessTranslateDto.php | 4 +-
src/Jobs/ProcessTranslate.php | 64 ++++-
.../SplitTextIntoParts/SplitHTMLCommand.php | 270 ++++++++++++++++++
.../SplitTextIntoParts/SplitTextCommand.php | 50 ++++
src/Translate/SplitTextIntoPartsCommand.php | 118 ++------
6 files changed, 441 insertions(+), 105 deletions(-)
create mode 100644 src/DTO/PartText.php
create mode 100644 src/Translate/SplitTextIntoParts/SplitHTMLCommand.php
create mode 100644 src/Translate/SplitTextIntoParts/SplitTextCommand.php
diff --git a/src/DTO/PartText.php b/src/DTO/PartText.php
new file mode 100644
index 0000000..18f55fc
--- /dev/null
+++ b/src/DTO/PartText.php
@@ -0,0 +1,40 @@
+texts[$this->part] = $text;
+ $this->beforeTexts[$this->part] = $beforeText;
+ $this->afterTexts[$this->part] = $afterText;
+
+ $this->part++;
+ }
+
+ public function getTextsForTranslation(): array
+ {
+ return $this->texts;
+ }
+
+ public function getTextsAfterTranslation(array $texts): string
+ {
+ $result = '';
+ foreach ($texts as $key => $text) {
+ if ($this->beforeTexts[$key] !== null) {
+ $result .= $this->beforeTexts[$key];
+ }
+ $result .= $text;
+ if ($this->afterTexts[$key] !== null) {
+ $result .= $this->afterTexts[$key];
+ }
+ }
+ return $result;
+ }
+}
\ No newline at end of file
diff --git a/src/DTO/ProcessTranslateDto.php b/src/DTO/ProcessTranslateDto.php
index 811b2da..6e9d0aa 100644
--- a/src/DTO/ProcessTranslateDto.php
+++ b/src/DTO/ProcessTranslateDto.php
@@ -10,7 +10,7 @@ final readonly class ProcessTranslateDto
private string $groupName,
private string $key,
private int $part,
- private string $text,
+ private PartText $text,
private TextType $textType,
private string $targetLanguageCode,
private ?string $sourceLanguageCode = null,
@@ -32,7 +32,7 @@ final readonly class ProcessTranslateDto
return $this->part;
}
- public function getText(): string
+ public function getText(): PartText
{
return $this->text;
}
diff --git a/src/Jobs/ProcessTranslate.php b/src/Jobs/ProcessTranslate.php
index d605868..0fd8473 100644
--- a/src/Jobs/ProcessTranslate.php
+++ b/src/Jobs/ProcessTranslate.php
@@ -59,28 +59,62 @@ final class ProcessTranslate implements ShouldQueue, ShouldBeEncrypted, ProcessT
{
$param = $this->param;
$groupName = $param->getGroupName();
+ $key = $param->getKey();
+ $part = $param->getPart();
$translated = Cache::get($groupName, []);
if (!isset($translated[$param->getKey()])) {
$translated[$param->getKey()] = [];
}
- $translate = Translate::service($param->getDriver());
- $function = $param->getTextType()->functionName();
- $key = $param->getKey();
- $part = $param->getPart();
-
- $translated[$key][$part] = $param->getText();
- if (\trim($param->getText()) !== '') {
-
- $translated[$key][$part] = $translate->{$function}(
- $param->getText(),
- $param->getTargetLanguageCode(),
- $param->getSourceLanguageCode()
- );
-
- }
+ $translated[$key][$part] = $this->translate();
Cache::put($groupName, $translated, 86400);
}
+
+ private function translate(): string
+ {
+ $param = $this->param;
+
+ $translate = Translate::service($param->getDriver());
+ $function = $param->getTextType()->functionName();
+
+ $partText = $param->getText();
+ $originalTexts = $partText->getTextsForTranslation();
+
+ $textsForTranslation = [];
+ $originalKeysByTranslationIndex = [];
+
+ foreach ($originalTexts as $originalKey => $text) {
+ if (\trim($text) === '') {
+ continue;
+ }
+
+ $translationIndex = count($textsForTranslation);
+
+ $textsForTranslation[$translationIndex] = $text;
+ $originalKeysByTranslationIndex[$translationIndex] = $originalKey;
+ }
+
+ if ($textsForTranslation === []) {
+ return $partText->getTextsAfterTranslation($originalTexts);
+ }
+
+ $translatedTexts = $translate->{$function}(
+ $textsForTranslation,
+ $param->getTargetLanguageCode(),
+ $param->getSourceLanguageCode()
+ );
+
+ foreach ($translatedTexts as $translationIndex => $translatedText) {
+ if (!isset($originalKeysByTranslationIndex[$translationIndex])) {
+ continue;
+ }
+
+ $originalKey = $originalKeysByTranslationIndex[$translationIndex];
+ $originalTexts[$originalKey] = $translatedText;
+ }
+
+ return $partText->getTextsAfterTranslation($originalTexts);
+ }
}
\ No newline at end of file
diff --git a/src/Translate/SplitTextIntoParts/SplitHTMLCommand.php b/src/Translate/SplitTextIntoParts/SplitHTMLCommand.php
new file mode 100644
index 0000000..2eb7841
--- /dev/null
+++ b/src/Translate/SplitTextIntoParts/SplitHTMLCommand.php
@@ -0,0 +1,270 @@
+
+ */
+ public function execute(string $html): array
+ {
+ $this->currentPart = 0;
+ $this->currentLength = 0;
+ $this->currentText = '';
+ $this->parts = [
+ 0 => new PartText(),
+ ];
+
+ libxml_use_internal_errors(true);
+
+ $dom = new DOMDocument();
+ $dom->loadHTML(
+ mb_convert_encoding('' . $html . '', 'HTML-ENTITIES', 'UTF-8'),
+ LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
+ );
+
+ $this->processNodes($dom, $dom->documentElement->childNodes);
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->currentText = '';
+ $this->currentBeforeText = '';
+ $this->currentAfterText = '';
+ $this->currentLength = 0;
+ $this->currentPart++;
+ }
+
+ libxml_clear_errors();
+
+ return $this->parts;
+ }
+
+ private function processNodes(DOMDocument $dom, DOMNodeList $nodes): void
+ {
+ /** @var DOMNode $node */
+ foreach ($nodes as $node) {
+ if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
+ $this->processText($node->textContent);
+ continue;
+ }
+
+ if ($node->nodeType === XML_ELEMENT_NODE) {
+ $this->processHtml($dom, $node);
+ continue;
+ }
+ }
+ }
+
+ private function processText(string $text): void
+ {
+ $currentLength = mb_strlen($text);
+
+ if ($this->currentLength + $currentLength <= $this->maxLength) {
+ $this->currentText .= $text;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->newPart();
+ }
+
+ if ($currentLength <= $this->maxLength) {
+ $this->currentText .= $text;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ $this->splitLongText($text);
+ }
+
+ private function splitLongText(string $text): void
+ {
+ // Common expression for searching for sentences.
+ $sentenceEndings = '/(?<=[.?!])[ \t]+(?=[A-ZА-Я])/u';
+
+ //Dividing the text into sentences
+ $sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
+
+ foreach ($sentences as $sentence) {
+ $currentLength = mb_strlen($sentence);
+ if ($this->currentLength > 0) {
+ $currentLength += 1;
+ }
+
+ // If adding a sentence does not exceed the limit, add it to the current part.
+ if ($this->currentLength + $currentLength <= $this->maxLength) {
+ $this->currentText .= (empty($this->currentText) ? '' : ' ') . $sentence;
+ $this->currentLength += $currentLength;
+ continue;
+ }
+
+ $this->addTextToPart();
+ $this->newPart();
+
+ $this->currentText = $sentence;
+ $this->currentLength = $currentLength;
+ }
+ }
+
+ private function processHtml(DOMDocument $dom, DOMNode $node): void
+ {
+ $html = $dom->saveHTML($node);
+ $currentLength = mb_strlen($html);
+
+ if ($this->currentLength + $currentLength <= $this->maxLength) {
+ $this->currentText .= $html;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->newPart();
+ }
+
+ if ($currentLength <= $this->maxLength) {
+ $this->currentText .= $html;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ if ($node->hasChildNodes()) {
+ $tag = strtolower($node->nodeName);
+ if ($tag === 'table') {
+ $this->splitTable($dom, $node);
+ return;
+ }
+
+ $this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
+
+ $this->processNodes($dom, $node->childNodes);
+
+ $this->currentAfterText .= '' . $tag . '>';
+ $this->addTextToPart();
+ $this->newPart();
+ return;
+ }
+ }
+
+ private function splitTable(DOMDocument $dom, DOMNode $node): void
+ {
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->newPart();
+ }
+ $this->currentBeforeText .= 'attributesToString($node) . '>';
+ if ($node->hasChildNodes()) {
+ foreach ($node->childNodes as $childNode) {
+ if ($childNode->nodeType === XML_TEXT_NODE || $childNode->nodeType === XML_CDATA_SECTION_NODE) {
+ continue;
+ }
+
+ if ($childNode->nodeType !== XML_ELEMENT_NODE) {
+ continue;
+ }
+
+ $this->processTableSection($dom, $childNode);
+ }
+ }
+
+ $this->currentAfterText .= '
';
+ $this->addTextToPart();
+ $this->newPart();
+ }
+
+ private function processTableSection(DOMDocument $dom, DOMNode $node): void
+ {
+ $tag = strtolower($node->nodeName);
+ if ($tag === 'tr') {
+ $this->processTableTr($dom, $node);
+ }
+
+ if (in_array($tag, ['thead', 'tbody', 'tfoot'], true)) {
+ $this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
+ foreach ($node->childNodes as $childNode) {
+ $tagNode = strtolower($childNode->nodeName);
+ if ($tagNode === 'tr') {
+ $this->processTableTr($dom, $childNode);
+ }
+ }
+ $this->currentAfterText .= '' . $tag . '>';
+ }
+ }
+
+ private function processTableTr(DOMDocument $dom, DOMNode $node): void
+ {
+ $this->currentBeforeText .= 'attributesToString($node) . '>';
+ foreach ($node->childNodes as $childNode) {
+ $tagNode = strtolower($childNode->nodeName);
+ if ($tagNode === 'td') {
+ $this->processTableTd($dom, $childNode);
+ }
+ }
+ $this->currentAfterText .= ' ';
+ $this->addTextToPart();
+ }
+
+ private function processTableTd(DOMDocument $dom, DOMNode $node): void
+ {
+ $this->currentBeforeText .= '