Add DTOs, exceptions, and jobs for translation service.

Introduced new Data Transfer Objects (DTOs), exceptions, and jobs to enhance the translation service functionality. Updated namespaces for consistency and added rate limiting to the translation provider. Expanded the README with detailed usage instructions.
This commit is contained in:
2024-10-11 00:22:46 +05:00
parent 0d13d602a7
commit 55b04f0eab
26 changed files with 751 additions and 36 deletions

View File

@@ -0,0 +1,115 @@
<?php declare(strict_types=1);
namespace KorElf\TranslateLaravel\Translate;
use KorElf\TranslateLaravel\Enums\TextType;
use KorElf\TranslateLaravel\Facades\Translate;
use DOMDocument;
final readonly class SplitTextIntoPartsCommand
{
public function execute(string $text, TextType $textType, ?string $driver): array
{
$maxLength = Translate::getLimit($driver);
$maxLength = $maxLength['max_symbols'] ?? null;
if ($maxLength === null || $maxLength > mb_strlen($text)) {
return [$text];
}
if ($textType === TextType::Html) {
return $this->splitHtmlText($text, $maxLength);
}
return $this->splitTextBySentences($text, $maxLength);
}
private function splitTextBySentences(string $text, int $maxLength): array
{
// Common expression for searching for sentences.
$sentenceEndings = '/(?<=[.?!])\s+(?=[A-ZА-Я])/u';
//Dividing the text into sentences
$sentences = preg_split($sentenceEndings, $text, -1, PREG_SPLIT_NO_EMPTY);
$parts = [];
$currentPart = '';
foreach ($sentences as $sentence) {
// If adding a sentence does not exceed the limit, add it to the current part.
if (mb_strlen($currentPart . ' ' . $sentence) <= $maxLength) {
$currentPart .= (empty($currentPart) ? '' : ' ') . $sentence;
continue;
}
// Otherwise, save the current part and start a new one.
if (!empty($currentPart)) {
$parts[] = $currentPart;
$currentPart = '';
}
$currentPart = $sentence;
}
if (!empty($currentPart)) {
$parts[] = $currentPart;
}
return $parts;
}
private function splitHtmlText(string $html, int $maxLength): array
{
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML(mb_convert_encoding('<body>' . $html . '</body>', 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$parts = [];
$currentPart = new DOMDocument();
$currentLength = 0;
// Iterate through all child nodes of the root element
foreach ($dom->documentElement->childNodes as $node) {
if ($node->nodeType === XML_TEXT_NODE || $node->nodeType === XML_CDATA_SECTION_NODE) {
// Dividing text nodes into sentences
$sentences = preg_split('/(?<=[.?!])\s+(?=[A-ZА-Я])/', $node->textContent, -1, PREG_SPLIT_NO_EMPTY);
foreach ($sentences as $sentence) {
$sentenceHtml = htmlspecialchars($sentence);
if ($currentLength + mb_strlen($sentenceHtml) > $maxLength) {
$parts[] = $currentPart->saveHTML();
// Start new part
$currentPart = new DOMDocument();
$currentLength = 0;
}
$textNode = $currentPart->createTextNode($sentence . ' ');
$currentPart->appendChild($textNode);
$currentLength += mb_strlen($sentenceHtml);
}
continue;
}
$nodeHtml = $dom->saveHTML($node);
if ($currentLength + mb_strlen($nodeHtml) > $maxLength) {
$parts[] = $currentPart->saveHTML();
// Start new part
$currentPart = new DOMDocument();
$currentLength = mb_strlen($nodeHtml);
}
$currentPart->appendChild($currentPart->importNode($node, true));
$currentLength += mb_strlen($nodeHtml);
}
if ($currentPart->hasChildNodes()) {
$parts[] = $currentPart->saveHTML();
}
libxml_clear_errors();
return $parts;
}
}