Add DTOs, exceptions, and jobs for translation service.
Introduced new Data Transfer Objects (DTOs), exceptions, and jobs to enhance the translation service functionality. Updated namespaces for consistency and added rate limiting to the translation provider. Expanded the README with detailed usage instructions.
This commit is contained in:
115
src/Translate/SplitTextIntoPartsCommand.php
Normal file
115
src/Translate/SplitTextIntoPartsCommand.php
Normal file
@@ -0,0 +1,115 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace KorElf\TranslateLaravel\Translate;
|
||||
|
||||
use KorElf\TranslateLaravel\Enums\TextType;
|
||||
use KorElf\TranslateLaravel\Facades\Translate;
|
||||
use DOMDocument;
|
||||
|
||||
final readonly class SplitTextIntoPartsCommand
|
||||
{
|
||||
public function execute(string $text, TextType $textType, ?string $driver): array
|
||||
{
|
||||
$maxLength = Translate::getLimit($driver);
|
||||
$maxLength = $maxLength['max_symbols'] ?? null;
|
||||
if ($maxLength === null || $maxLength > mb_strlen($text)) {
|
||||
return [$text];
|
||||
}
|
||||
|
||||
if ($textType === TextType::Html) {
|
||||
return $this->splitHtmlText($text, $maxLength);
|
||||
}
|
||||
|
||||
return $this->splitTextBySentences($text, $maxLength);
|
||||
}
|
||||
|
||||
private function splitTextBySentences(string $text, int $maxLength): array
|
||||
{
|
||||
// Common expression for searching for sentences.
|
||||
$sentenceEndings = '/(?<=[.?!])\s+(?=[A-ZА-Я])/u';
|
||||
|
||||
//Dividing the text into sentences
|
||||
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
|
||||
|
||||
$parts = [];
|
||||
$currentPart = '';
|
||||
|
||||
foreach ($sentences as $sentence) {
|
||||
// If adding a sentence does not exceed the limit, add it to the current part.
|
||||
if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
|
||||
$currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, save the current part and start a new one.
|
||||
if (!empty($currentPart)) {
|
||||
$parts[] = $currentPart;
|
||||
$currentPart = '';
|
||||
}
|
||||
$currentPart = $sentence;
|
||||
}
|
||||
|
||||
if (!empty($currentPart)) {
|
||||
$parts[] = $currentPart;
|
||||
}
|
||||
|
||||
return $parts;
|
||||
}
|
||||
|
||||
private function splitHtmlText(string $html, int $maxLength): array
|
||||
{
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$dom = new DOMDocument();
|
||||
$dom->loadHTML(mb_convert_encoding('<body>' . $html . '</body>', 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
||||
|
||||
$parts = [];
|
||||
$currentPart = new DOMDocument();
|
||||
$currentLength = 0;
|
||||
|
||||
// Iterate through all child nodes of the root element
|
||||
foreach ($dom->documentElement->childNodes as $node) {
|
||||
if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||
// Dividing text nodes into sentences
|
||||
$sentences = preg_split('/(?<=[.?!])\s+(?=[A-ZА-Я])/', $node->textContent, -1, PREG_SPLIT_NO_EMPTY);
|
||||
|
||||
foreach ($sentences as $sentence) {
|
||||
$sentenceHtml = htmlspecialchars($sentence);
|
||||
if ($currentLength + mb_strlen($sentenceHtml) > $maxLength) {
|
||||
$parts[] = $currentPart->saveHTML();
|
||||
|
||||
// Start new part
|
||||
$currentPart = new DOMDocument();
|
||||
$currentLength = 0;
|
||||
}
|
||||
|
||||
$textNode = $currentPart->createTextNode($sentence . ' ');
|
||||
$currentPart->appendChild($textNode);
|
||||
$currentLength += mb_strlen($sentenceHtml);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
$nodeHtml = $dom->saveHTML($node);
|
||||
|
||||
if ($currentLength + mb_strlen($nodeHtml) > $maxLength) {
|
||||
$parts[] = $currentPart->saveHTML();
|
||||
|
||||
// Start new part
|
||||
$currentPart = new DOMDocument();
|
||||
$currentLength = mb_strlen($nodeHtml);
|
||||
}
|
||||
|
||||
$currentPart->appendChild($currentPart->importNode($node, true));
|
||||
$currentLength += mb_strlen($nodeHtml);
|
||||
}
|
||||
|
||||
if ($currentPart->hasChildNodes()) {
|
||||
$parts[] = $currentPart->saveHTML();
|
||||
}
|
||||
|
||||
libxml_clear_errors();
|
||||
|
||||
return $parts;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user