Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
65cfbd88d8
|
|||
|
fe90500e44
|
|||
|
92fd2cab1b
|
|||
|
e2eb80f140
|
@@ -7,7 +7,7 @@
|
|||||||
### TRANSLATE_SERVICE
|
### TRANSLATE_SERVICE
|
||||||
По умолчанию через какой сервис обращаться за переводом. На данный момент доступен только сервисы - **yandex**, **log**.
|
По умолчанию через какой сервис обращаться за переводом. На данный момент доступен только сервисы - **yandex**, **log**.
|
||||||
|
|
||||||
**yandex** - https://yandex.cloud/ru/services/translate<br>
|
**yandex** - https://aistudio.yandex.ru/docs/ru/translate/concepts/<br>
|
||||||
**log** - для проверки и внедрения в свой продукт.
|
**log** - для проверки и внедрения в свой продукт.
|
||||||
|
|
||||||
### TRANSLATE_YANDEX_FOLDER_ID
|
### TRANSLATE_YANDEX_FOLDER_ID
|
||||||
|
|||||||
+4
-3
@@ -19,11 +19,12 @@
|
|||||||
],
|
],
|
||||||
"require": {
|
"require": {
|
||||||
"php": "^8.2",
|
"php": "^8.2",
|
||||||
"illuminate/support": "^10.0|^11.0",
|
"illuminate/support": "^10.0|^11.0|^12.0|^13.0",
|
||||||
"guzzlehttp/guzzle": "^7.0.1",
|
"guzzlehttp/guzzle": "^7.0.1",
|
||||||
"web-token/jwt-framework": "^3.0",
|
"web-token/jwt-framework": "^4.1",
|
||||||
"ext-libxml": "*",
|
"ext-libxml": "*",
|
||||||
"ext-dom": "*"
|
"ext-dom": "*",
|
||||||
|
"ext-mbstring": "*"
|
||||||
},
|
},
|
||||||
"extra": {
|
"extra": {
|
||||||
"laravel": {
|
"laravel": {
|
||||||
|
|||||||
@@ -0,0 +1,40 @@
|
|||||||
|
<?php declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace KorElf\TranslateLaravel\DTO;
|
||||||
|
|
||||||
|
final class PartText
|
||||||
|
{
|
||||||
|
private array $texts = [];
|
||||||
|
private array $beforeTexts = [];
|
||||||
|
private array $afterTexts = [];
|
||||||
|
private int $part = 0;
|
||||||
|
|
||||||
|
public function add(string $text, ?string $beforeText = null, ?string $afterText = null): void
|
||||||
|
{
|
||||||
|
$this->texts[$this->part] = $text;
|
||||||
|
$this->beforeTexts[$this->part] = $beforeText;
|
||||||
|
$this->afterTexts[$this->part] = $afterText;
|
||||||
|
|
||||||
|
$this->part++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTextsForTranslation(): array
|
||||||
|
{
|
||||||
|
return $this->texts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTextsAfterTranslation(array $texts): string
|
||||||
|
{
|
||||||
|
$result = '';
|
||||||
|
foreach ($texts as $key => $text) {
|
||||||
|
if ($this->beforeTexts[$key] !== null) {
|
||||||
|
$result .= $this->beforeTexts[$key];
|
||||||
|
}
|
||||||
|
$result .= $text;
|
||||||
|
if ($this->afterTexts[$key] !== null) {
|
||||||
|
$result .= $this->afterTexts[$key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -10,7 +10,7 @@ final readonly class ProcessTranslateDto
|
|||||||
private string $groupName,
|
private string $groupName,
|
||||||
private string $key,
|
private string $key,
|
||||||
private int $part,
|
private int $part,
|
||||||
private string $text,
|
private PartText $text,
|
||||||
private TextType $textType,
|
private TextType $textType,
|
||||||
private string $targetLanguageCode,
|
private string $targetLanguageCode,
|
||||||
private ?string $sourceLanguageCode = null,
|
private ?string $sourceLanguageCode = null,
|
||||||
@@ -32,7 +32,7 @@ final readonly class ProcessTranslateDto
|
|||||||
return $this->part;
|
return $this->part;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getText(): string
|
public function getText(): PartText
|
||||||
{
|
{
|
||||||
return $this->text;
|
return $this->text;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,28 +59,62 @@ final class ProcessTranslate implements ShouldQueue, ShouldBeEncrypted, ProcessT
|
|||||||
{
|
{
|
||||||
$param = $this->param;
|
$param = $this->param;
|
||||||
$groupName = $param->getGroupName();
|
$groupName = $param->getGroupName();
|
||||||
|
$key = $param->getKey();
|
||||||
|
$part = $param->getPart();
|
||||||
|
|
||||||
$translated = Cache::get($groupName, []);
|
$translated = Cache::get($groupName, []);
|
||||||
if (!isset($translated[$param->getKey()])) {
|
if (!isset($translated[$param->getKey()])) {
|
||||||
$translated[$param->getKey()] = [];
|
$translated[$param->getKey()] = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
$translate = Translate::service($param->getDriver());
|
$translated[$key][$part] = $this->translate();
|
||||||
$function = $param->getTextType()->functionName();
|
|
||||||
$key = $param->getKey();
|
|
||||||
$part = $param->getPart();
|
|
||||||
|
|
||||||
$translated[$key][$part] = $param->getText();
|
|
||||||
if (\trim($param->getText()) !== '') {
|
|
||||||
|
|
||||||
$translated[$key][$part] = $translate->{$function}(
|
|
||||||
$param->getText(),
|
|
||||||
$param->getTargetLanguageCode(),
|
|
||||||
$param->getSourceLanguageCode()
|
|
||||||
);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Cache::put($groupName, $translated, 86400);
|
Cache::put($groupName, $translated, 86400);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function translate(): string
|
||||||
|
{
|
||||||
|
$param = $this->param;
|
||||||
|
|
||||||
|
$translate = Translate::service($param->getDriver());
|
||||||
|
$function = $param->getTextType()->functionName();
|
||||||
|
|
||||||
|
$partText = $param->getText();
|
||||||
|
$originalTexts = $partText->getTextsForTranslation();
|
||||||
|
|
||||||
|
$textsForTranslation = [];
|
||||||
|
$originalKeysByTranslationIndex = [];
|
||||||
|
|
||||||
|
foreach ($originalTexts as $originalKey => $text) {
|
||||||
|
if (\trim($text) === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$translationIndex = count($textsForTranslation);
|
||||||
|
|
||||||
|
$textsForTranslation[$translationIndex] = $text;
|
||||||
|
$originalKeysByTranslationIndex[$translationIndex] = $originalKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($textsForTranslation === []) {
|
||||||
|
return $partText->getTextsAfterTranslation($originalTexts);
|
||||||
|
}
|
||||||
|
|
||||||
|
$translatedTexts = $translate->{$function}(
|
||||||
|
$textsForTranslation,
|
||||||
|
$param->getTargetLanguageCode(),
|
||||||
|
$param->getSourceLanguageCode()
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach ($translatedTexts as $translationIndex => $translatedText) {
|
||||||
|
if (!isset($originalKeysByTranslationIndex[$translationIndex])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$originalKey = $originalKeysByTranslationIndex[$translationIndex];
|
||||||
|
$originalTexts[$originalKey] = $translatedText;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $partText->getTextsAfterTranslation($originalTexts);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,270 @@
|
|||||||
|
<?php declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace KorElf\TranslateLaravel\Translate\SplitTextIntoParts;
|
||||||
|
|
||||||
|
use DOMNodeList;
|
||||||
|
use KorElf\TranslateLaravel\DTO\PartText;
|
||||||
|
use DOMNode;
|
||||||
|
use DOMDocument;
|
||||||
|
|
||||||
|
final class SplitHTMLCommand
|
||||||
|
{
|
||||||
|
private array $parts = [];
|
||||||
|
private int $currentPart = 0;
|
||||||
|
private int $currentLength = 0;
|
||||||
|
private string $currentText = '';
|
||||||
|
private string $currentBeforeText = '';
|
||||||
|
private string $currentAfterText = '';
|
||||||
|
|
||||||
|
public function __construct(private readonly int $maxLength)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $html
|
||||||
|
* @return array<int, PartText>
|
||||||
|
*/
|
||||||
|
public function execute(string $html): array
|
||||||
|
{
|
||||||
|
$this->currentPart = 0;
|
||||||
|
$this->currentLength = 0;
|
||||||
|
$this->currentText = '';
|
||||||
|
$this->parts = [
|
||||||
|
0 => new PartText(),
|
||||||
|
];
|
||||||
|
|
||||||
|
libxml_use_internal_errors(true);
|
||||||
|
|
||||||
|
$dom = new DOMDocument();
|
||||||
|
$dom->loadHTML(
|
||||||
|
mb_convert_encoding('<body>' . $html . '</body>', 'HTML-ENTITIES', 'UTF-8'),
|
||||||
|
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->processNodes($dom, $dom->documentElement->childNodes);
|
||||||
|
if ($this->currentLength > 0) {
|
||||||
|
$this->addTextToPart();
|
||||||
|
$this->currentText = '';
|
||||||
|
$this->currentBeforeText = '';
|
||||||
|
$this->currentAfterText = '';
|
||||||
|
$this->currentLength = 0;
|
||||||
|
$this->currentPart++;
|
||||||
|
}
|
||||||
|
|
||||||
|
libxml_clear_errors();
|
||||||
|
|
||||||
|
return $this->parts;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function processNodes(DOMDocument $dom, DOMNodeList $nodes): void
|
||||||
|
{
|
||||||
|
/** @var DOMNode $node */
|
||||||
|
foreach ($nodes as $node) {
|
||||||
|
if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||||
|
$this->processText($node->textContent);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($node->nodeType === XML_ELEMENT_NODE) {
|
||||||
|
$this->processHtml($dom, $node);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function processText(string $text): void
|
||||||
|
{
|
||||||
|
$currentLength = mb_strlen($text);
|
||||||
|
|
||||||
|
if ($this->currentLength + $currentLength <= $this->maxLength) {
|
||||||
|
$this->currentText .= $text;
|
||||||
|
$this->currentLength += $currentLength;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->currentLength > 0) {
|
||||||
|
$this->addTextToPart();
|
||||||
|
$this->newPart();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($currentLength <= $this->maxLength) {
|
||||||
|
$this->currentText .= $text;
|
||||||
|
$this->currentLength += $currentLength;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->splitLongText($text);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function splitLongText(string $text): void
|
||||||
|
{
|
||||||
|
// Common expression for searching for sentences.
|
||||||
|
$sentenceEndings = '/(?<=[.?!])[ \t]+(?=[A-ZА-Я])/u';
|
||||||
|
|
||||||
|
//Dividing the text into sentences
|
||||||
|
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
|
||||||
|
|
||||||
|
foreach ($sentences as $sentence) {
|
||||||
|
$currentLength = mb_strlen($sentence);
|
||||||
|
if ($this->currentLength > 0) {
|
||||||
|
$currentLength += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If adding a sentence does not exceed the limit, add it to the current part.
|
||||||
|
if ($this->currentLength + $currentLength <= $this->maxLength) {
|
||||||
|
$this->currentText .= (empty($this->currentText) ? '' : ' ') . $sentence;
|
||||||
|
$this->currentLength += $currentLength;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->addTextToPart();
|
||||||
|
$this->newPart();
|
||||||
|
|
||||||
|
$this->currentText = $sentence;
|
||||||
|
$this->currentLength = $currentLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function processHtml(DOMDocument $dom, DOMNode $node): void
|
||||||
|
{
|
||||||
|
$html = $dom->saveHTML($node);
|
||||||
|
$currentLength = mb_strlen($html);
|
||||||
|
|
||||||
|
if ($this->currentLength + $currentLength <= $this->maxLength) {
|
||||||
|
$this->currentText .= $html;
|
||||||
|
$this->currentLength += $currentLength;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->currentLength > 0) {
|
||||||
|
$this->addTextToPart();
|
||||||
|
$this->newPart();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($currentLength <= $this->maxLength) {
|
||||||
|
$this->currentText .= $html;
|
||||||
|
$this->currentLength += $currentLength;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($node->hasChildNodes()) {
|
||||||
|
$tag = strtolower($node->nodeName);
|
||||||
|
if ($tag === 'table') {
|
||||||
|
$this->splitTable($dom, $node);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
|
||||||
|
|
||||||
|
$this->processNodes($dom, $node->childNodes);
|
||||||
|
|
||||||
|
$this->currentAfterText .= '</' . $tag . '>';
|
||||||
|
$this->addTextToPart();
|
||||||
|
$this->newPart();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function splitTable(DOMDocument $dom, DOMNode $node): void
|
||||||
|
{
|
||||||
|
if ($this->currentLength > 0) {
|
||||||
|
$this->addTextToPart();
|
||||||
|
$this->newPart();
|
||||||
|
}
|
||||||
|
$this->currentBeforeText .= '<table' . $this->attributesToString($node) . '>';
|
||||||
|
if ($node->hasChildNodes()) {
|
||||||
|
foreach ($node->childNodes as $childNode) {
|
||||||
|
if ($childNode->nodeType === XML_TEXT_NODE || $childNode->nodeType === XML_CDATA_SECTION_NODE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($childNode->nodeType !== XML_ELEMENT_NODE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->processTableSection($dom, $childNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->currentAfterText .= '</table>';
|
||||||
|
$this->addTextToPart();
|
||||||
|
$this->newPart();
|
||||||
|
}
|
||||||
|
|
||||||
|
private function processTableSection(DOMDocument $dom, DOMNode $node): void
|
||||||
|
{
|
||||||
|
$tag = strtolower($node->nodeName);
|
||||||
|
if ($tag === 'tr') {
|
||||||
|
$this->processTableTr($dom, $node);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_array($tag, ['thead', 'tbody', 'tfoot'], true)) {
|
||||||
|
$this->currentBeforeText .= '<' . $tag . $this->attributesToString($node) . '>';
|
||||||
|
foreach ($node->childNodes as $childNode) {
|
||||||
|
$tagNode = strtolower($childNode->nodeName);
|
||||||
|
if ($tagNode === 'tr') {
|
||||||
|
$this->processTableTr($dom, $childNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->currentAfterText .= '</' . $tag . '>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function processTableTr(DOMDocument $dom, DOMNode $node): void
|
||||||
|
{
|
||||||
|
$this->currentBeforeText .= '<tr' . $this->attributesToString($node) . '>';
|
||||||
|
foreach ($node->childNodes as $childNode) {
|
||||||
|
$tagNode = strtolower($childNode->nodeName);
|
||||||
|
if ($tagNode === 'td') {
|
||||||
|
$this->processTableTd($dom, $childNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->currentAfterText .= '</tr>';
|
||||||
|
$this->addTextToPart();
|
||||||
|
}
|
||||||
|
|
||||||
|
private function processTableTd(DOMDocument $dom, DOMNode $node): void
|
||||||
|
{
|
||||||
|
$this->currentBeforeText .= '<td' . $this->attributesToString($node) . '>';
|
||||||
|
$this->processNodes($dom, $node->childNodes);
|
||||||
|
$this->currentAfterText .= '</td>';
|
||||||
|
$this->addTextToPart();
|
||||||
|
}
|
||||||
|
|
||||||
|
private function attributesToString(DOMNode $node): string
|
||||||
|
{
|
||||||
|
if (!$node->hasAttributes()) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$parts = [];
|
||||||
|
foreach ($node->attributes as $attr) {
|
||||||
|
$parts[] = sprintf(
|
||||||
|
' %s="%s"',
|
||||||
|
$attr->nodeName,
|
||||||
|
htmlspecialchars($attr->nodeValue ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode('', $parts);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function addTextToPart(): void
|
||||||
|
{
|
||||||
|
$this->parts[$this->currentPart]->add($this->currentText, $this->currentBeforeText, $this->currentAfterText);
|
||||||
|
$this->currentText = '';
|
||||||
|
$this->currentBeforeText = '';
|
||||||
|
$this->currentAfterText = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
private function newPart(): void
|
||||||
|
{
|
||||||
|
$this->currentText = '';
|
||||||
|
$this->currentBeforeText = '';
|
||||||
|
$this->currentAfterText = '';
|
||||||
|
$this->currentLength = 0;
|
||||||
|
$this->currentPart++;
|
||||||
|
$this->parts[$this->currentPart] = new PartText();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
<?php declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace KorElf\TranslateLaravel\Translate\SplitTextIntoParts;
|
||||||
|
|
||||||
|
use KorElf\TranslateLaravel\DTO\PartText;
|
||||||
|
|
||||||
|
final readonly class SplitTextCommand
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param string $text
|
||||||
|
* @param int $maxLength
|
||||||
|
* @return array<int, PartText>
|
||||||
|
*/
|
||||||
|
public function execute(string $text, int $maxLength): array
|
||||||
|
{
|
||||||
|
// Common expression for searching for sentences.
|
||||||
|
$sentenceEndings = '/(?<=[.?!])[ \t]+(?=[A-ZА-Я])/u';
|
||||||
|
|
||||||
|
//Dividing the text into sentences
|
||||||
|
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
|
||||||
|
|
||||||
|
$parts = [];
|
||||||
|
$currentPart = '';
|
||||||
|
|
||||||
|
foreach ($sentences as $sentence) {
|
||||||
|
$part = new PartText();
|
||||||
|
|
||||||
|
// If adding a sentence does not exceed the limit, add it to the current part.
|
||||||
|
if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
|
||||||
|
$currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, save the current part and start a new one.
|
||||||
|
if (!empty($currentPart)) {
|
||||||
|
$part->add($currentPart);
|
||||||
|
$parts[] = $part;
|
||||||
|
}
|
||||||
|
$currentPart = $sentence;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($currentPart)) {
|
||||||
|
$part = new PartText();
|
||||||
|
$part->add($currentPart);
|
||||||
|
$parts[] = $part;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $parts;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,114 +2,56 @@
|
|||||||
|
|
||||||
namespace KorElf\TranslateLaravel\Translate;
|
namespace KorElf\TranslateLaravel\Translate;
|
||||||
|
|
||||||
|
use KorElf\TranslateLaravel\DTO\PartText;
|
||||||
use KorElf\TranslateLaravel\Enums\TextType;
|
use KorElf\TranslateLaravel\Enums\TextType;
|
||||||
use KorElf\TranslateLaravel\Facades\Translate;
|
use KorElf\TranslateLaravel\Facades\Translate;
|
||||||
use DOMDocument;
|
use KorElf\TranslateLaravel\Translate\SplitTextIntoParts\SplitHTMLCommand;
|
||||||
|
use KorElf\TranslateLaravel\Translate\SplitTextIntoParts\SplitTextCommand;
|
||||||
|
|
||||||
final readonly class SplitTextIntoPartsCommand
|
final readonly class SplitTextIntoPartsCommand
|
||||||
{
|
{
|
||||||
|
/**
|
||||||
|
* @param string $text
|
||||||
|
* @param TextType $textType
|
||||||
|
* @param string|null $driver
|
||||||
|
* @return array<int, PartText>
|
||||||
|
*/
|
||||||
public function execute(string $text, TextType $textType, ?string $driver): array
|
public function execute(string $text, TextType $textType, ?string $driver): array
|
||||||
{
|
{
|
||||||
$maxLength = Translate::getLimit($driver);
|
$maxLength = Translate::getLimit($driver);
|
||||||
$maxLength = $maxLength['max_symbols'] ?? null;
|
$maxLength = $maxLength['max_symbols'] ?? null;
|
||||||
if ($maxLength === null || $maxLength > mb_strlen($text)) {
|
if ($maxLength === null || $maxLength > mb_strlen($text)) {
|
||||||
return [$text];
|
$part = new PartText();
|
||||||
|
$part->add($text);
|
||||||
|
return [$part];
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($textType === TextType::Html) {
|
if ($textType === TextType::Html) {
|
||||||
return $this->splitHtmlText($text, $maxLength);
|
return $this->splitHtml($text, $maxLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->splitTextBySentences($text, $maxLength);
|
return $this->splitText($text, $maxLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function splitTextBySentences(string $text, int $maxLength): array
|
/**
|
||||||
|
* @param string $text
|
||||||
|
* @param int $maxLength
|
||||||
|
* @return array<int, PartText>
|
||||||
|
*/
|
||||||
|
private function splitText(string $text, int $maxLength): array
|
||||||
{
|
{
|
||||||
// Common expression for searching for sentences.
|
$splitTextCommand = new SplitTextCommand();
|
||||||
$sentenceEndings = '/(?<=[.?!])\s+(?=[A-ZА-Я])/u';
|
return $splitTextCommand->execute($text, $maxLength);
|
||||||
|
|
||||||
//Dividing the text into sentences
|
|
||||||
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
|
|
||||||
|
|
||||||
$parts = [];
|
|
||||||
$currentPart = '';
|
|
||||||
|
|
||||||
foreach ($sentences as $sentence) {
|
|
||||||
// If adding a sentence does not exceed the limit, add it to the current part.
|
|
||||||
if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
|
|
||||||
$currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, save the current part and start a new one.
|
|
||||||
if (!empty($currentPart)) {
|
|
||||||
$parts[] = $currentPart;
|
|
||||||
$currentPart = '';
|
|
||||||
}
|
|
||||||
$currentPart = $sentence;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($currentPart)) {
|
|
||||||
$parts[] = $currentPart;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $parts;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function splitHtmlText(string $html, int $maxLength): array
|
/**
|
||||||
|
* @param string $html
|
||||||
|
* @param int $maxLength
|
||||||
|
* @return array<int, PartText>
|
||||||
|
*/
|
||||||
|
private function splitHtml(string $html, int $maxLength): array
|
||||||
{
|
{
|
||||||
libxml_use_internal_errors(true);
|
$splitHTMLCommand = new SplitHTMLCommand($maxLength);
|
||||||
|
return $splitHTMLCommand->execute($html);
|
||||||
$dom = new DOMDocument();
|
|
||||||
$dom->loadHTML(mb_convert_encoding('<body>' . $html . '</body>', 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
|
||||||
|
|
||||||
$parts = [];
|
|
||||||
$currentPart = new DOMDocument();
|
|
||||||
$currentLength = 0;
|
|
||||||
|
|
||||||
// Iterate through all child nodes of the root element
|
|
||||||
foreach ($dom->documentElement->childNodes as $node) {
|
|
||||||
if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
|
|
||||||
// Dividing text nodes into sentences
|
|
||||||
$sentences = preg_split('/(?<=[.?!])\s+(?=[A-ZА-Я])/', $node->textContent, -1, PREG_SPLIT_NO_EMPTY);
|
|
||||||
|
|
||||||
foreach ($sentences as $sentence) {
|
|
||||||
$sentenceHtml = htmlspecialchars($sentence);
|
|
||||||
if ($currentLength + mb_strlen($sentenceHtml) > $maxLength) {
|
|
||||||
$parts[] = $currentPart->saveHTML();
|
|
||||||
|
|
||||||
// Start new part
|
|
||||||
$currentPart = new DOMDocument();
|
|
||||||
$currentLength = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
$textNode = $currentPart->createTextNode($sentence . ' ');
|
|
||||||
$currentPart->appendChild($textNode);
|
|
||||||
$currentLength += mb_strlen($sentenceHtml);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$nodeHtml = $dom->saveHTML($node);
|
|
||||||
|
|
||||||
if ($currentLength + mb_strlen($nodeHtml) > $maxLength) {
|
|
||||||
$parts[] = $currentPart->saveHTML();
|
|
||||||
|
|
||||||
// Start new part
|
|
||||||
$currentPart = new DOMDocument();
|
|
||||||
$currentLength = mb_strlen($nodeHtml);
|
|
||||||
}
|
|
||||||
|
|
||||||
$currentPart->appendChild($currentPart->importNode($node, true));
|
|
||||||
$currentLength += mb_strlen($nodeHtml);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($currentPart->hasChildNodes()) {
|
|
||||||
$parts[] = $currentPart->saveHTML();
|
|
||||||
}
|
|
||||||
|
|
||||||
libxml_clear_errors();
|
|
||||||
|
|
||||||
return $parts;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user