diff --git a/README.md b/README.md
index 20d636f..9e444d1 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
### TRANSLATE_SERVICE
По умолчанию через какой сервис обращаться за переводом. На данный момент доступен только сервисы - **yandex**, **log**.
-**yandex** - https://yandex.cloud/ru/services/translate
+**yandex** - https://aistudio.yandex.ru/docs/ru/translate/concepts/
**log** - для проверки и внедрения в свой продукт.
### TRANSLATE_YANDEX_FOLDER_ID
diff --git a/composer.json b/composer.json
index 3427dc0..a63e37f 100644
--- a/composer.json
+++ b/composer.json
@@ -19,11 +19,12 @@
],
"require": {
"php": "^8.2",
- "illuminate/support": "^10.0|^11.0",
+ "illuminate/support": "^10.0|^11.0|^12.0|^13.0",
"guzzlehttp/guzzle": "^7.0.1",
"web-token/jwt-framework": "^3.0",
"ext-libxml": "*",
- "ext-dom": "*"
+ "ext-dom": "*",
+ "ext-mbstring": "*"
},
"extra": {
"laravel": {
diff --git a/src/DTO/PartText.php b/src/DTO/PartText.php
new file mode 100644
index 0000000..18f55fc
--- /dev/null
+++ b/src/DTO/PartText.php
@@ -0,0 +1,40 @@
+texts[$this->part] = $text;
+ $this->beforeTexts[$this->part] = $beforeText;
+ $this->afterTexts[$this->part] = $afterText;
+
+ $this->part++;
+ }
+
+ public function getTextsForTranslation(): array
+ {
+ return $this->texts;
+ }
+
+ public function getTextsAfterTranslation(array $texts): string
+ {
+ $result = '';
+ foreach ($texts as $key => $text) {
+ if ($this->beforeTexts[$key] !== null) {
+ $result .= $this->beforeTexts[$key];
+ }
+ $result .= $text;
+ if ($this->afterTexts[$key] !== null) {
+ $result .= $this->afterTexts[$key];
+ }
+ }
+ return $result;
+ }
+}
\ No newline at end of file
diff --git a/src/DTO/ProcessTranslateDto.php b/src/DTO/ProcessTranslateDto.php
index 811b2da..6e9d0aa 100644
--- a/src/DTO/ProcessTranslateDto.php
+++ b/src/DTO/ProcessTranslateDto.php
@@ -10,7 +10,7 @@ final readonly class ProcessTranslateDto
private string $groupName,
private string $key,
private int $part,
- private string $text,
+ private PartText $text,
private TextType $textType,
private string $targetLanguageCode,
private ?string $sourceLanguageCode = null,
@@ -32,7 +32,7 @@ final readonly class ProcessTranslateDto
return $this->part;
}
- public function getText(): string
+ public function getText(): PartText
{
return $this->text;
}
diff --git a/src/Jobs/ProcessTranslate.php b/src/Jobs/ProcessTranslate.php
index d605868..0fd8473 100644
--- a/src/Jobs/ProcessTranslate.php
+++ b/src/Jobs/ProcessTranslate.php
@@ -59,28 +59,62 @@ final class ProcessTranslate implements ShouldQueue, ShouldBeEncrypted, ProcessT
{
$param = $this->param;
$groupName = $param->getGroupName();
+ $key = $param->getKey();
+ $part = $param->getPart();
$translated = Cache::get($groupName, []);
if (!isset($translated[$param->getKey()])) {
$translated[$param->getKey()] = [];
}
- $translate = Translate::service($param->getDriver());
- $function = $param->getTextType()->functionName();
- $key = $param->getKey();
- $part = $param->getPart();
-
- $translated[$key][$part] = $param->getText();
- if (\trim($param->getText()) !== '') {
-
- $translated[$key][$part] = $translate->{$function}(
- $param->getText(),
- $param->getTargetLanguageCode(),
- $param->getSourceLanguageCode()
- );
-
- }
+ $translated[$key][$part] = $this->translate();
Cache::put($groupName, $translated, 86400);
}
+
+ private function translate(): string
+ {
+ $param = $this->param;
+
+ $translate = Translate::service($param->getDriver());
+ $function = $param->getTextType()->functionName();
+
+ $partText = $param->getText();
+ $originalTexts = $partText->getTextsForTranslation();
+
+ $textsForTranslation = [];
+ $originalKeysByTranslationIndex = [];
+
+ foreach ($originalTexts as $originalKey => $text) {
+ if (\trim($text) === '') {
+ continue;
+ }
+
+ $translationIndex = count($textsForTranslation);
+
+ $textsForTranslation[$translationIndex] = $text;
+ $originalKeysByTranslationIndex[$translationIndex] = $originalKey;
+ }
+
+ if ($textsForTranslation === []) {
+ return $partText->getTextsAfterTranslation($originalTexts);
+ }
+
+ $translatedTexts = $translate->{$function}(
+ $textsForTranslation,
+ $param->getTargetLanguageCode(),
+ $param->getSourceLanguageCode()
+ );
+
+ foreach ($translatedTexts as $translationIndex => $translatedText) {
+ if (!isset($originalKeysByTranslationIndex[$translationIndex])) {
+ continue;
+ }
+
+ $originalKey = $originalKeysByTranslationIndex[$translationIndex];
+ $originalTexts[$originalKey] = $translatedText;
+ }
+
+ return $partText->getTextsAfterTranslation($originalTexts);
+ }
}
\ No newline at end of file
diff --git a/src/Translate/SplitTextIntoParts/SplitHTMLCommand.php b/src/Translate/SplitTextIntoParts/SplitHTMLCommand.php
new file mode 100644
index 0000000..2eb7841
--- /dev/null
+++ b/src/Translate/SplitTextIntoParts/SplitHTMLCommand.php
@@ -0,0 +1,270 @@
+
+ */
+ public function execute(string $html): array
+ {
+ $this->currentPart = 0;
+ $this->currentLength = 0;
+ $this->currentText = '';
+ $this->parts = [
+ 0 => new PartText(),
+ ];
+
+ libxml_use_internal_errors(true);
+
+ $dom = new DOMDocument();
+ $dom->loadHTML(
+ mb_convert_encoding('
' . $html . '', 'HTML-ENTITIES', 'UTF-8'),
+ LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
+ );
+
+ $this->processNodes($dom, $dom->documentElement->childNodes);
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->currentText = '';
+ $this->currentBeforeText = '';
+ $this->currentAfterText = '';
+ $this->currentLength = 0;
+ $this->currentPart++;
+ }
+
+ libxml_clear_errors();
+
+ return $this->parts;
+ }
+
+ private function processNodes(DOMDocument $dom, DOMNodeList $nodes): void
+ {
+ /** @var DOMNode $node */
+ foreach ($nodes as $node) {
+ if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
+ $this->processText($node->textContent);
+ continue;
+ }
+
+ if ($node->nodeType === XML_ELEMENT_NODE) {
+ $this->processHtml($dom, $node);
+ continue;
+ }
+ }
+ }
+
+ private function processText(string $text): void
+ {
+ $currentLength = mb_strlen($text);
+
+ if ($this->currentLength + $currentLength <= $this->maxLength) {
+ $this->currentText .= $text;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->newPart();
+ }
+
+ if ($currentLength <= $this->maxLength) {
+ $this->currentText .= $text;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ $this->splitLongText($text);
+ }
+
+ private function splitLongText(string $text): void
+ {
+ // Common expression for searching for sentences.
+ $sentenceEndings = '/(?<=[.?!])[ \t]+(?=[A-ZА-Я])/u';
+
+ //Dividing the text into sentences
+ $sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
+
+ foreach ($sentences as $sentence) {
+ $currentLength = mb_strlen($sentence);
+ if ($this->currentLength > 0) {
+ $currentLength += 1;
+ }
+
+ // If adding a sentence does not exceed the limit, add it to the current part.
+ if ($this->currentLength + $currentLength <= $this->maxLength) {
+ $this->currentText .= (empty($this->currentText) ? '' : ' ') . $sentence;
+ $this->currentLength += $currentLength;
+ continue;
+ }
+
+ $this->addTextToPart();
+ $this->newPart();
+
+ $this->currentText = $sentence;
+ $this->currentLength = $currentLength;
+ }
+ }
+
+ private function processHtml(DOMDocument $dom, DOMNode $node): void
+ {
+ $html = $dom->saveHTML($node);
+ $currentLength = mb_strlen($html);
+
+ if ($this->currentLength + $currentLength <= $this->maxLength) {
+ $this->currentText .= $html;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->newPart();
+ }
+
+ if ($currentLength <= $this->maxLength) {
+ $this->currentText .= $html;
+ $this->currentLength += $currentLength;
+ return;
+ }
+
+ if ($node->hasChildNodes()) {
+ $tag = strtolower($node->nodeName);
+ if ($tag === 'table') {
+ $this->splitTable($dom, $node);
+ return;
+ }
+
+ $this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
+
+ $this->processNodes($dom, $node->childNodes);
+
+ $this->currentAfterText .= '' . $tag . '>';
+ $this->addTextToPart();
+ $this->newPart();
+ return;
+ }
+ }
+
+ private function splitTable(DOMDocument $dom, DOMNode $node): void
+ {
+ if ($this->currentLength > 0) {
+ $this->addTextToPart();
+ $this->newPart();
+ }
+ $this->currentBeforeText .= 'attributesToString($node) . '>';
+ if ($node->hasChildNodes()) {
+ foreach ($node->childNodes as $childNode) {
+ if ($childNode->nodeType === XML_TEXT_NODE || $childNode->nodeType === XML_CDATA_SECTION_NODE) {
+ continue;
+ }
+
+ if ($childNode->nodeType !== XML_ELEMENT_NODE) {
+ continue;
+ }
+
+ $this->processTableSection($dom, $childNode);
+ }
+ }
+
+ $this->currentAfterText .= '
';
+ $this->addTextToPart();
+ $this->newPart();
+ }
+
+ private function processTableSection(DOMDocument $dom, DOMNode $node): void
+ {
+ $tag = strtolower($node->nodeName);
+ if ($tag === 'tr') {
+ $this->processTableTr($dom, $node);
+ }
+
+ if (in_array($tag, ['thead', 'tbody', 'tfoot'], true)) {
+ $this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
+ foreach ($node->childNodes as $childNode) {
+ $tagNode = strtolower($childNode->nodeName);
+ if ($tagNode === 'tr') {
+ $this->processTableTr($dom, $childNode);
+ }
+ }
+ $this->currentAfterText .= '' . $tag . '>';
+ }
+ }
+
+ private function processTableTr(DOMDocument $dom, DOMNode $node): void
+ {
+ $this->currentBeforeText .= 'attributesToString($node) . '>';
+ foreach ($node->childNodes as $childNode) {
+ $tagNode = strtolower($childNode->nodeName);
+ if ($tagNode === 'td') {
+ $this->processTableTd($dom, $childNode);
+ }
+ }
+ $this->currentAfterText .= '
';
+ $this->addTextToPart();
+ }
+
+ private function processTableTd(DOMDocument $dom, DOMNode $node): void
+ {
+ $this->currentBeforeText .= 'attributesToString($node) . '>';
+ $this->processNodes($dom, $node->childNodes);
+ $this->currentAfterText .= ' | ';
+ $this->addTextToPart();
+ }
+
+ private function attributesToString(DOMNode $node): string
+ {
+ if (!$node->hasAttributes()) {
+ return '';
+ }
+
+ $parts = [];
+ foreach ($node->attributes as $attr) {
+ $parts[] = sprintf(
+ ' %s="%s"',
+ $attr->nodeName,
+ htmlspecialchars($attr->nodeValue ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
+ );
+ }
+
+ return implode('', $parts);
+ }
+
+ private function addTextToPart(): void
+ {
+ $this->parts[$this->currentPart]->add($this->currentText, $this->currentBeforeText, $this->currentAfterText);
+ $this->currentText = '';
+ $this->currentBeforeText = '';
+ $this->currentAfterText = '';
+ }
+
+ private function newPart(): void
+ {
+ $this->currentText = '';
+ $this->currentBeforeText = '';
+ $this->currentAfterText = '';
+ $this->currentLength = 0;
+ $this->currentPart++;
+ $this->parts[$this->currentPart] = new PartText();
+ }
+}
\ No newline at end of file
diff --git a/src/Translate/SplitTextIntoParts/SplitTextCommand.php b/src/Translate/SplitTextIntoParts/SplitTextCommand.php
new file mode 100644
index 0000000..290d6aa
--- /dev/null
+++ b/src/Translate/SplitTextIntoParts/SplitTextCommand.php
@@ -0,0 +1,50 @@
+
+ */
+ public function execute(string $text, int $maxLength): array
+ {
+ // Common expression for searching for sentences.
+ $sentenceEndings = '/(?<=[.?!])[ \t]+(?=[A-ZА-Я])/u';
+
+ //Dividing the text into sentences
+ $sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
+
+ $parts = [];
+ $currentPart = '';
+
+ foreach ($sentences as $sentence) {
+ $part = new PartText();
+
+ // If adding a sentence does not exceed the limit, add it to the current part.
+ if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
+ $currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
+ continue;
+ }
+
+ // Otherwise, save the current part and start a new one.
+ if (!empty($currentPart)) {
+ $part->add($currentPart);
+ $parts[] = $part;
+ }
+ $currentPart = $sentence;
+ }
+
+ if (!empty($currentPart)) {
+ $part = new PartText();
+ $part->add($currentPart);
+ $parts[] = $part;
+ }
+
+ return $parts;
+ }
+}
\ No newline at end of file
diff --git a/src/Translate/SplitTextIntoPartsCommand.php b/src/Translate/SplitTextIntoPartsCommand.php
index ce985c4..4a58b7b 100644
--- a/src/Translate/SplitTextIntoPartsCommand.php
+++ b/src/Translate/SplitTextIntoPartsCommand.php
@@ -2,114 +2,56 @@
namespace KorElf\TranslateLaravel\Translate;
+use KorElf\TranslateLaravel\DTO\PartText;
use KorElf\TranslateLaravel\Enums\TextType;
use KorElf\TranslateLaravel\Facades\Translate;
-use DOMDocument;
+use KorElf\TranslateLaravel\Translate\SplitTextIntoParts\SplitHTMLCommand;
+use KorElf\TranslateLaravel\Translate\SplitTextIntoParts\SplitTextCommand;
final readonly class SplitTextIntoPartsCommand
{
+ /**
+ * @param string $text
+ * @param TextType $textType
+ * @param string|null $driver
+ * @return array
+ */
public function execute(string $text, TextType $textType, ?string $driver): array
{
$maxLength = Translate::getLimit($driver);
$maxLength = $maxLength['max_symbols'] ?? null;
if ($maxLength === null || $maxLength > mb_strlen($text)) {
- return [$text];
+ $part = new PartText();
+ $part->add($text);
+ return [$part];
}
if ($textType === TextType::Html) {
- return $this->splitHtmlText($text, $maxLength);
+ return $this->splitHtml($text, $maxLength);
}
- return $this->splitTextBySentences($text, $maxLength);
+ return $this->splitText($text, $maxLength);
}
- private function splitTextBySentences(string $text, int $maxLength): array
+ /**
+ * @param string $text
+ * @param int $maxLength
+ * @return array
+ */
+ private function splitText(string $text, int $maxLength): array
{
- // Common expression for searching for sentences.
- $sentenceEndings = '/(?<=[.?!])\s+(?=[A-ZА-Я])/u';
-
- //Dividing the text into sentences
- $sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
-
- $parts = [];
- $currentPart = '';
-
- foreach ($sentences as $sentence) {
- // If adding a sentence does not exceed the limit, add it to the current part.
- if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
- $currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
- continue;
- }
-
- // Otherwise, save the current part and start a new one.
- if (!empty($currentPart)) {
- $parts[] = $currentPart;
- $currentPart = '';
- }
- $currentPart = $sentence;
- }
-
- if (!empty($currentPart)) {
- $parts[] = $currentPart;
- }
-
- return $parts;
+ $splitTextCommand = new SplitTextCommand();
+ return $splitTextCommand->execute($text, $maxLength);
}
- private function splitHtmlText(string $html, int $maxLength): array
+ /**
+ * @param string $html
+ * @param int $maxLength
+ * @return array
+ */
+ private function splitHtml(string $html, int $maxLength): array
{
- libxml_use_internal_errors(true);
-
- $dom = new DOMDocument();
- $dom->loadHTML(mb_convert_encoding('' . $html . '', 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
-
- $parts = [];
- $currentPart = new DOMDocument();
- $currentLength = 0;
-
- // Iterate through all child nodes of the root element
- foreach ($dom->documentElement->childNodes as $node) {
- if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
- // Dividing text nodes into sentences
- $sentences = preg_split('/(?<=[.?!])\s+(?=[A-ZА-Я])/', $node->textContent, -1, PREG_SPLIT_NO_EMPTY);
-
- foreach ($sentences as $sentence) {
- $sentenceHtml = htmlspecialchars($sentence);
- if ($currentLength + mb_strlen($sentenceHtml) > $maxLength) {
- $parts[] = $currentPart->saveHTML();
-
- // Start new part
- $currentPart = new DOMDocument();
- $currentLength = 0;
- }
-
- $textNode = $currentPart->createTextNode($sentence . ' ');
- $currentPart->appendChild($textNode);
- $currentLength += mb_strlen($sentenceHtml);
- }
- continue;
- }
-
- $nodeHtml = $dom->saveHTML($node);
-
- if ($currentLength + mb_strlen($nodeHtml) > $maxLength) {
- $parts[] = $currentPart->saveHTML();
-
- // Start new part
- $currentPart = new DOMDocument();
- $currentLength = mb_strlen($nodeHtml);
- }
-
- $currentPart->appendChild($currentPart->importNode($node, true));
- $currentLength += mb_strlen($nodeHtml);
- }
-
- if ($currentPart->hasChildNodes()) {
- $parts[] = $currentPart->saveHTML();
- }
-
- libxml_clear_errors();
-
- return $parts;
+ $splitHTMLCommand = new SplitHTMLCommand($maxLength);
+ return $splitHTMLCommand->execute($html);
}
}
\ No newline at end of file