diff --git a/connectors/l10n_drupal/src/Plugin/l10n_server/Connector/Drupal.php b/connectors/l10n_drupal/src/Plugin/l10n_server/Connector/Drupal.php index eb5b355d09b49aa2bf0e1309b9b9e8509d2fdac8..1de0ba1a08b68c28f097892356c1b749f8eb9c15 100644 --- a/connectors/l10n_drupal/src/Plugin/l10n_server/Connector/Drupal.php +++ b/connectors/l10n_drupal/src/Plugin/l10n_server/Connector/Drupal.php @@ -73,12 +73,12 @@ class Drupal extends ConfigurableConnectorPluginBase implements ConnectorScanHan /** * {@inheritdoc} */ - public function scanHandler(): ConnectorScanHandlerResultInterface { + public function scanHandler(?int $row_limit = NULL, bool $resume = FALSE): ConnectorScanHandlerResultInterface { // @todo Fix scanner handling. - return new ConnectorScanHandlerResult([ - 'projects' => rand(0, 9), - 'releases' => rand(0, 99), - ]); + return new ConnectorScanHandlerResult( + rand(0, 9), + rand(0, 99), + ); } /** diff --git a/connectors/l10n_drupal_rest/l10n_drupal_rest.services.yml b/connectors/l10n_drupal_rest/l10n_drupal_rest.services.yml index afdcb4a6a9f082f3c5dd412a2f04ba9a8e14bd3e..de3b59e4071ef1017aaf72084a9cbe2da85273f0 100644 --- a/connectors/l10n_drupal_rest/l10n_drupal_rest.services.yml +++ b/connectors/l10n_drupal_rest/l10n_drupal_rest.services.yml @@ -1,8 +1,11 @@ services: + _defaults: + autowire: true l10n_drupal_rest.scanner: class: Drupal\l10n_drupal_rest\ScannerService - arguments: ['@config.factory', '@state', '@http_client', '@file.repository', '@file_system', '@logger.factory', '@entity_type.manager'] l10n_drupal_rest.parser: class: Drupal\l10n_drupal_rest\ParserService - arguments: ['@config.factory', '@state', '@http_client', '@file.repository', '@file_system', '@logger.factory', '@entity_type.manager'] Drupal\l10n_drupal_rest\ParserService: '@l10n_drupal_rest.parser' + logger.channel.l10n_drupal_rest: + parent: logger.channel_base + arguments: ['l10n_drupal_rest'] diff --git a/connectors/l10n_drupal_rest/src/ParserService.php b/connectors/l10n_drupal_rest/src/ParserService.php index b74e9500f55cc9e097e6bca2f2bd4e72b707b632..957c4ae90873de4188f9021a68842b3c57383968 100644 --- a/connectors/l10n_drupal_rest/src/ParserService.php +++ b/connectors/l10n_drupal_rest/src/ParserService.php @@ -8,7 +8,6 @@ use Drupal\Core\Config\ConfigFactoryInterface; use Drupal\Core\Config\ImmutableConfig; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\File\FileSystemInterface; -use Drupal\Core\Logger\LoggerChannelFactoryInterface; use Drupal\Core\Logger\LoggerChannelInterface; use Drupal\Core\State\StateInterface; use Drupal\Core\StringTranslation\StringTranslationTrait; @@ -18,6 +17,7 @@ use Drupal\l10n_server\Entity\L10nServerError; use Drupal\l10n_server\Entity\L10nServerReleaseInterface; use Drupal\l10n_server\L10nHelper; use GuzzleHttp\ClientInterface; +use Symfony\Component\DependencyInjection\Attribute\Autowire; /** * Service description. @@ -149,31 +149,14 @@ class ParserService { */ private array $files; - /** - * Constructs a DrupalRestService object. - * - * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory - * The config factory. - * @param \Drupal\Core\State\StateInterface $state - * The state service. - * @param \GuzzleHttp\ClientInterface $http_client - * The HTTP client. - * @param \Drupal\file\FileRepositoryInterface $file_repository - * The file repository. - * @param \Drupal\Core\File\FileSystemInterface $file_system - * The file system. - * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory - * The logger factory. - * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager - * The entity type manager. - */ public function __construct( ConfigFactoryInterface $config_factory, StateInterface $state, ClientInterface $http_client, FileRepositoryInterface $file_repository, FileSystemInterface $file_system, - LoggerChannelFactoryInterface $logger_factory, + #[Autowire(service: 'logger.channel.l10n_drupal_rest')] + LoggerChannelInterface $logger, EntityTypeManagerInterface $entity_type_manager, ) { $this->config = $config_factory->get('l10n_server.settings'); @@ -181,7 +164,7 @@ class ParserService { $this->httpClient = $http_client; $this->fileRepository = $file_repository; $this->fileSystem = $file_system; - $this->logger = $logger_factory->get('l10n_drupal_rest'); + $this->logger = $logger; $this->entityTypeManager = $entity_type_manager; $this->filesCount = 0; $this->linesCount = 0; diff --git a/connectors/l10n_drupal_rest/src/Plugin/l10n_server/Connector/DrupalRest.php b/connectors/l10n_drupal_rest/src/Plugin/l10n_server/Connector/DrupalRest.php index b8c62cba877c87125bb4ab5eab6ff0b6c68715ea..12f46663cf3a10d79db2e584d1f23c2f12a17238 100644 --- a/connectors/l10n_drupal_rest/src/Plugin/l10n_server/Connector/DrupalRest.php +++ b/connectors/l10n_drupal_rest/src/Plugin/l10n_server/Connector/DrupalRest.php @@ -111,14 +111,19 @@ class DrupalRest extends ConnectorPluginBase implements ConnectorScanHandlerInte /** * {@inheritdoc} */ - public function scanHandler(): ConnectorScanHandlerResultInterface { - $this->scanner->setConnector($this); - if ($this->scanner->scan()) { - return new ConnectorScanHandlerResult([ - 'projects' => $this->scanner->getProjectCount(), - 'releases' => $this->scanner->getReleaseCount(), - ]); + public function scanHandler(?int $row_limit = NULL, bool $resume = FALSE): ConnectorScanHandlerResultInterface { + $releases_tsv_url = $this->configFactory + ->get('l10n_server.settings') + ->get('connectors.drupal_rest:restapi.source.restapi.refresh_url'); + if (!$releases_tsv_url) { + $this->logger->error($this->t('The download url for releases.tsv is not configured.')); + return new ConnectorScanHandlerResult(); + } + $result = $this->scanner->scan($releases_tsv_url, $row_limit, $resume); + if ($result !== NULL) { + return $result; } + return new ConnectorScanHandlerResult(); } diff --git a/connectors/l10n_drupal_rest/src/ScannerService.php b/connectors/l10n_drupal_rest/src/ScannerService.php index 9536cfdcba7af5a24a46dab63e2a7dcbdb3d5b84..b9ed05e54b534f70bda3481b3606a965b1bc626d 100644 --- a/connectors/l10n_drupal_rest/src/ScannerService.php +++ b/connectors/l10n_drupal_rest/src/ScannerService.php @@ -4,28 +4,30 @@ declare(strict_types=1); namespace Drupal\l10n_drupal_rest; -use Drupal\Core\Config\ConfigFactoryInterface; -use Drupal\Core\Config\ImmutableConfig; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\File\FileExists; use Drupal\Core\File\FileSystemInterface; -use Drupal\Core\Logger\LoggerChannelFactoryInterface; use Drupal\Core\Logger\LoggerChannelInterface; use Drupal\Core\State\StateInterface; use Drupal\Core\StringTranslation\StringTranslationTrait; use Drupal\Core\Url; use Drupal\file\FileRepositoryInterface; -use Drupal\l10n_server\ConnectorInterface; +use Drupal\l10n_server\ConnectorScanHandlerResult; +use Drupal\l10n_server\ConnectorScanHandlerResultInterface; +use Drupal\l10n_server\Entity\L10nServerProjectInterface; use GuzzleHttp\ClientInterface; +use Symfony\Component\DependencyInjection\Attribute\Autowire; /** - * Service description. + * A service to create project and release entities based on a tsv file. */ class ScannerService { use StringTranslationTrait; - const LAST_SYNC_TIME = 'l10n_drupal_rest.last_sync_time'; + const LAST_SYNC_DATETIME = 'l10n_drupal_rest.last_sync_datetime'; + + const STATE_KEY_REMAINING_COUNT = 'l10n_drupal_rest.remaining_count'; const PROJECT_CONNECTOR_MODULE = 'drupal_rest:restapi'; @@ -33,261 +35,155 @@ class ScannerService { const PROJECT_STATUS = 1; - /** - * The config object. - * - * @var \Drupal\Core\Config\ImmutableConfig - */ - protected ImmutableConfig $config; - - /** - * A state service. - * - * @var \Drupal\Core\State\StateInterface - */ - protected StateInterface $state; - - /** - * An HTTP client service. - * - * @var \GuzzleHttp\ClientInterface - */ - protected ClientInterface $httpClient; - - /** - * A file repository service. - * - * @var \Drupal\file\FileRepositoryInterface - */ - protected FileRepositoryInterface $fileRepository; - - /** - * A file system service. - * - * @var \Drupal\Core\File\FileSystemInterface - */ - private FileSystemInterface $fileSystem; - - /** - * The logger channel. - * - * @var \Drupal\Core\Logger\LoggerChannelInterface - */ - protected LoggerChannelInterface $logger; - - /** - * Entity type manager. - * - * @var \Drupal\Core\Entity\EntityTypeManagerInterface - */ - private EntityTypeManagerInterface $entityTypeManager; - - /** - * The connector instance. - * - * @var \Drupal\l10n_server\ConnectorInterface - */ - private ConnectorInterface $connector; - - /** - * A projects array. - * - * @var array - */ - private array $projects; - - /** - * The project count. - * - * @var int - */ - private int $projectCount; - - /** - * A releases array. - * - * @var array - */ - private array $releases; - - /** - * The release count. - * - * @var int - */ - private int $releaseCount; - - /** - * The filepath. - * - * @var string - */ - private string $filepath; - - /** - * Last sync time. - * - * @var int - */ - private int $lastSyncTime; - - /** - * Last sync before time. - * - * @var int - */ - private int $lastSyncBeforeTime; - - /** - * Constructs a DrupalRestService object. - * - * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory - * The config factory. - * @param \Drupal\Core\State\StateInterface $state - * The state service. - * @param \GuzzleHttp\ClientInterface $http_client - * The HTTP client. - * @param \Drupal\file\FileRepositoryInterface $file_repository - * The file repository. - * @param \Drupal\Core\File\FileSystemInterface $file_system - * The file system. - * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory - * The logger factory. - * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager - * The entity type manager. - */ public function __construct( - ConfigFactoryInterface $config_factory, - StateInterface $state, - ClientInterface $http_client, - FileRepositoryInterface $file_repository, - FileSystemInterface $file_system, - LoggerChannelFactoryInterface $logger_factory, - EntityTypeManagerInterface $entity_type_manager, - ) { - $this->config = $config_factory->get('l10n_server.settings'); - $this->state = $state; - $this->httpClient = $http_client; - $this->fileRepository = $file_repository; - $this->fileSystem = $file_system; - $this->logger = $logger_factory->get('l10n_drupal_rest'); - $this->entityTypeManager = $entity_type_manager; - $this->projects = []; - $this->projectCount = 0; - $this->releases = []; - $this->releaseCount = 0; - $this->filepath = ''; - $this->lastSyncTime = 0; - $this->lastSyncBeforeTime = 0; - } - - /** - * Sets connector. - * - * @param \Drupal\l10n_server\ConnectorInterface $connector - * The connector instance. - * - * @return $this - */ - public function setConnector(ConnectorInterface $connector): self { - $this->connector = $connector; - return $this; - } - - /** - * The project count. - * - * @return int - * The project count integer. - */ - public function getProjectCount(): int { - return $this->projectCount; - } - - /** - * The release count. - * - * @return int - * The release count integer. - */ - public function getReleaseCount(): int { - return $this->releaseCount; - } + protected readonly StateInterface $state, + protected readonly ClientInterface $httpClient, + protected readonly FileRepositoryInterface $fileRepository, + protected readonly FileSystemInterface $fileSystem, + #[Autowire(service: 'logger.channel.l10n_drupal_rest')] + protected readonly LoggerChannelInterface $logger, + protected readonly EntityTypeManagerInterface $entityTypeManager, + ) {} /** * Scans for new projects and/or releases. * - * @return bool - * Boolean true on success, false on failure. + * @param string $releases_tsv_url + * The url from where to download the releases.tsv file. + * @param int|null $row_limit + * Maximum number of source records to process, or NULL for no limit. + * @param bool $resume + * TRUE to resume parsing an already downloaded file, if exists. + * FALSE to force downloading a new file. + * + * @return \Drupal\l10n_server\ConnectorScanHandlerResultInterface|null + * The scan handler result object, or NULL on failure. */ - public function scan(): bool { + public function scan(string $releases_tsv_url, ?int $row_limit = NULL, bool $resume = FALSE): ?ConnectorScanHandlerResultInterface { // Only sync releases which are at most one day older than our last sync - // date. This ensures time zone issues and releases published while the - // previous cron run will not be a problem, but we only look at a relatively - // small list of releases at any given time. We only sync tagged releases, - // which will not get rebuilt later anytime. - $this->lastSyncTime = $this->state->get(static::LAST_SYNC_TIME, 0); - $this->lastSyncBeforeTime = $this->lastSyncTime - 86400; - - try { - $this->logger->notice('Fetching project list...'); - $this->fetchProjectList(); + // date. This supports the assumption that the latest chunk of the + // releases.tsv file might not always be fully complete. + $last_sync_date_string = $this->state->get(static::LAST_SYNC_DATETIME, 0); + + $filepath = NULL; + $previous_remaining_count = NULL; + if ($resume) { + $previous_remaining_count = $this->state->get(static::STATE_KEY_REMAINING_COUNT, NULL); + if ($previous_remaining_count === 0) { + // No further records to process. + return new ConnectorScanHandlerResult(); + } + if ($previous_remaining_count !== NULL) { + $filepath = $this->findExistingReleasesTsv(); + } } - catch (\Exception $e) { - $this->logger->error($this->t('Error fetching project list: @code @message', [ - '@code' => $e->getCode(), - '@message' => $e->getMessage(), - ])); - return FALSE; + + $cutoff_date_string = NULL; + if ($filepath === NULL) { + $this->state->delete(static::STATE_KEY_REMAINING_COUNT); + try { + $this->logger->notice('Fetching project list...'); + $filepath = $this->downloadReleasesTsv($releases_tsv_url); + } + catch (\Exception $e) { + $this->logger->error($this->t('Error fetching project list: @code @message', [ + '@code' => $e->getCode(), + '@message' => $e->getMessage(), + ])); + return NULL; + } + $cutoff_date_string = gmdate('Y-m-d H:i:s', strtotime($last_sync_date_string . ' UTC') - 86400); } try { $this->logger->notice('Parsing project list...'); - $this->parseProjectList(); + $records = $this->readReleasesTsv($filepath, $cutoff_date_string, $previous_remaining_count); } catch (\Exception $e) { $this->logger->error($this->t('Error parsing project list: @code @message', [ '@code' => $e->getCode(), '@message' => $e->getMessage(), ])); - return FALSE; + return NULL; } - try { - $this->logger->notice('Storing project list...'); - $this->storeProjectList(); + // Pre-calculate processed and remaining count. + // These will be discarded if the operation aborts prematurely. + if ($row_limit === NULL || $row_limit >= count($records)) { + $remaining_count = 0; + $records_to_process = $records; } - catch (\Exception $e) { - $this->logger->error($this->t('Error storing project list: @code @message', [ - '@code' => $e->getCode(), - '@message' => $e->getMessage(), - ])); - return FALSE; + elseif ($row_limit === 0) { + $remaining_count = count($records); + $records_to_process = []; + } + else { + $remaining_count = count($records) - $row_limit; + // Start with the oldest records. + $records_to_process = array_slice($records, $remaining_count); } - try { - $this->logger->notice('Storing release list...'); - $this->storeReleaseList(); + if ($records_to_process === []) { + $project_entities_created_count = 0; + $release_entities_created_count = 0; } - catch (\Exception $e) { - $this->logger->error($this->t('Error storing release list: @code @message', [ - '@code' => $e->getCode(), - '@message' => $e->getMessage(), - ])); - return FALSE; + else { + try { + $this->logger->notice('Writing project and release entities...'); + [$project_entities_created_count, $release_entities_created_count] = $this->writeEntities($records_to_process); + } + catch (\Exception $e) { + $this->logger->error($this->t('Error storing release list: @code @message', [ + '@code' => $e->getCode(), + '@message' => $e->getMessage(), + ])); + return NULL; + } + $latest_record_date_string = reset($records_to_process)['created']; + $this->state->set(static::LAST_SYNC_DATETIME, max($latest_record_date_string, $last_sync_date_string)); } - $this->state->set(static::LAST_SYNC_TIME, $this->lastSyncTime); - return TRUE; + $this->state->set(static::STATE_KEY_REMAINING_COUNT, $remaining_count); + + if ($remaining_count === 0) { + $this->fileSystem->delete($filepath); + } + + return new ConnectorScanHandlerResult( + $project_entities_created_count, + $release_entities_created_count, + count($records_to_process), + $remaining_count, + ); + } + + /** + * Finds a previously downloaded releases.tsv file. + * + * @return string|null + * Path to a previously downloaded releases.tsv file, or NULL if it does not + * exist. + */ + private function findExistingReleasesTsv(): ?string { + $filepath = 'temporary://releases.tsv'; + if (!file_exists($filepath)) { + return NULL; + } + return $filepath; } /** - * Fetch the project list. + * Downloads the releases.tsv file, typically from drupal.org. + * + * @param string $url + * The url from where to download the releases.tsv file. + * + * @return string + * Path to the downloaded releases.tsv file. */ - private function fetchProjectList(): void { + private function downloadReleasesTsv(string $url): string { // Fetch projects and releases since last sync. - $this->filepath = 'temporary://releases.tsv'; - $url = $this->config->get('connectors.drupal_rest:restapi.source.restapi.refresh_url'); + $filepath = 'temporary://releases.tsv'; // Add a timestamp GET parameter to prevent CDN caching. $url = Url::fromUri($url, ['query' => ['time' => time()]])->toString(); @@ -296,173 +192,257 @@ class ScannerService { $response = $this->httpClient->get($url, ['connect_timeout' => 30]); // Save as temporary file. - /** @var \Drupal\file\FileRepositoryInterface $fileRepository */ + /** @var \Drupal\file\FileRepositoryInterface $file_repository */ $file_repository = \Drupal::service('file.repository'); $file_repository->writeData( $response->getBody()->getContents(), - $this->filepath, + $filepath, FileExists::Rename, ); + return $filepath; } /** - * Parse the project list. + * Reads the releases.tsv file up to a cutoff date. + * + * @param string $filepath + * Path to the downloaded releases.tsv file. + * @param string|null $cutoff_date_string + * A cutoff date string, as 'Y-m-d H:i:s', or NULL for no limit. + * Any tsv records with 'created' older than this will be ignored. + * In fact, when any record is encountered in the .tsv with 'created' as old + * or older than this cutoff, any subsequent records are also ignored, based + * on the assumption that the .tsv is ordered by "newest first". + * @param int|null $max_count + * The maximum size of the returned array, or NULL for no limit. + * + * @return list + * List of records from the .tsv file, up to the cutoff date. + * The 'row_index' contains the row number within the csv, starting at 1. + * (Number 0 would be the header row.) */ - private function parseProjectList(): void { - $headers = []; - + private function readReleasesTsv(string $filepath, ?string $cutoff_date_string, ?int $max_count): array { // Read from temporary file. - if (($handle = fopen($this->filepath, "r")) !== FALSE) { - while (($data = fgetcsv($handle, 1000, "\t")) !== FALSE) { - // Get headers. - if (empty($headers)) { - $headers = array_flip($data); - continue; - } - - // Filter out sandboxes and malformed releases. - if (count($data) < 4 || is_numeric($data[$headers['project_machine_name']])) { - continue; - } - - $time = strtotime($data[$headers['created']]); - if ($this->lastSyncBeforeTime < $time) { - $machine_name = trim($data[$headers['project_machine_name']]); - $title = trim($data[$headers['project_name']]); - - // A first array for projects. - $this->projects[$machine_name] = $title; - - // A second array for releases. - $this->releases[] = [ - 'created' => $time, - 'machine_name' => $machine_name, - 'title' => $title, - 'version' => $data[$headers['version']], - ]; - } - else { - break; - } - } + $handle = fopen($filepath, "r"); + if ($handle === FALSE) { + throw new \Exception(sprintf("Cannot open '%s' for reading.", $filepath)); + } + $headers = fgetcsv($handle, 1000, "\t"); + if ($headers === FALSE) { + fclose($handle); + throw new \Exception(sprintf("The file '%s' seems to be empty.", $filepath)); + } + if ($headers !== ['created', 'project_machine_name', 'version', 'project_name']) { + // A bad releases.tsv was downloaded, or the structure of the file may + // have changed on drupal.org. + fclose($handle); + throw new \Exception(sprintf("Unexpected head row '%s' in %s.", implode("', '", $headers), $filepath)); + } - if (is_resource($handle)) { + $records = []; + $previous_created_date_string = NULL; + for ($row_index = 1; ($csv_row = fgetcsv($handle, 1000, "\t")) !== FALSE; ++$row_index) { + // Fail on malformed releases. + if (count($csv_row) !== 4) { fclose($handle); + throw new \Exception(sprintf("Row %d of %s has %d instead of 4 columns.", $row_index, $filepath, count($csv_row))); + } + + $csv_record = array_combine($headers, $csv_row); + + // Filter out sandboxes. + if (is_numeric($csv_record['project_machine_name'])) { + continue; + } + + if (!preg_match('#^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d$#', $csv_record['created'])) { + throw new \Exception(sprintf("Unexpected created date string '%s' found in row %d of %s.", $csv_record['created'], $row_index, $filepath)); + } + if ($previous_created_date_string !== NULL && $previous_created_date_string < $csv_record['created']) { + throw new \Exception(sprintf("The 'created' date %s in row %d of %s is younger than that in the previous row.", $csv_record['created'], $row_index, $filepath)); + } + $previous_created_date_string = $csv_record['created']; + if ($cutoff_date_string >= $csv_record['created']) { + break; + } + $records[] = [ + ...$csv_record, + 'row_index' => $row_index, + ]; + + if ($max_count !== NULL && count($records) >= $max_count) { + break; } } - $this->fileSystem->delete($this->filepath); + fclose($handle); + + return $records; } /** - * Store the project list. + * Gets or creates a project entity and returns the id. + * + * @param string $machine_name + * Project machine name. + * @param string $title + * Project title. + * @param array $known_project_ids + * Cache of known project ids. + * @param int $project_entities_created_count + * Counter to increment for newly created project entities. + * + * @return int + * The project id. */ - private function storeProjectList(): void { + private function getProjectId(string $machine_name, string $title, array &$known_project_ids, int &$project_entities_created_count): int { + // Get the project id. + if (!isset($known_project_ids[$machine_name])) { + $project = $this->loadAndUpdateProjectEntity($machine_name, $title); + if ($project === NULL) { + $project = $this->createProjectEntity($machine_name, $title); + ++$project_entities_created_count; + } + $known_project_ids[$machine_name] = (int) $project->id(); + } + return $known_project_ids[$machine_name]; + } + + /** + * Loads and updates a project entity. + * + * @param string $machine_name + * The project machine name. + * @param string $title + * The project title, to be updated if it does not match. + * + * @return \Drupal\l10n_server\Entity\L10nServerProjectInterface|null + * The project entity, or NULL if none found. + */ + private function loadAndUpdateProjectEntity(string $machine_name, string $title): ?L10nServerProjectInterface { $project_storage = $this->entityTypeManager ->getStorage('l10n_server_project'); - - $this->projectCount = 0; - foreach ($this->projects as $project_uri => $project_title) { - $existing_projects = $project_storage->getQuery() - ->accessCheck(TRUE) - ->condition('uri', $project_uri) - ->accessCheck(FALSE) - ->execute(); - - if ($existing_projects) { - /** @var \Drupal\l10n_server\Entity\L10nServerProject $existing_project */ - $existing_project = $project_storage->load(reset($existing_projects)); - - // Check that the title is correct, if not update it. - if ($existing_project->label() !== $project_title) { - $existing_project - ->set('title', $project_title) - ->save(); - - $this->logger->info('Project %name renamed to %title.', [ - '%title' => $project_title, - '%name' => $project_uri, - ]); - } - } - else { - $this->logger->notice('Creating new project...'); - - $this->projectCount++; - $project_storage->create([ - 'uri' => $project_uri, - 'title' => $project_title, - 'last_parsed' => \Drupal::time()->getRequestTime(), - 'homepage' => implode('/', [ - static::PROJECT_PACKAGE_URL, - 'project', - $project_uri, - ]), - 'connector_module' => self::PROJECT_CONNECTOR_MODULE, - 'status' => self::PROJECT_STATUS, - ])->save(); - - $this->logger->notice('Project %title (%uri) added.', [ - '%title' => $project_title, - '%uri' => $project_uri, - ]); - } + $project_ids = $project_storage->getQuery() + ->accessCheck(TRUE) + ->condition('uri', $machine_name) + ->accessCheck(FALSE) + ->execute(); + if (!$project_ids) { + return NULL; + } + $existing_project = $project_storage->load(reset($project_ids)); + assert($existing_project instanceof L10nServerProjectInterface); + // Check that the title is correct, if not update it. + if ($existing_project->label() !== $title) { + $existing_project + ->set('title', $title) + ->save(); + + $this->logger->info('Project %name renamed to %title.', [ + '%title' => $title, + '%name' => $machine_name, + ]); } + return $existing_project; } /** - * Store the release list. + * Creates a new project entity. + * + * @param string $machine_name + * The project machine name. + * @param string $title + * The project title. + * + * @return \Drupal\l10n_server\Entity\L10nServerProjectInterface + * The newly created project entity. */ - private function storeReleaseList(): void { + private function createProjectEntity(string $machine_name, string $title): L10nServerProjectInterface { + $this->logger->notice('Creating new project...'); $project_storage = $this->entityTypeManager ->getStorage('l10n_server_project'); + $project = $project_storage->create([ + 'uri' => $machine_name, + 'title' => $title, + 'last_parsed' => \Drupal::time()->getRequestTime(), + 'homepage' => implode('/', [ + static::PROJECT_PACKAGE_URL, + 'project', + $machine_name, + ]), + 'connector_module' => self::PROJECT_CONNECTOR_MODULE, + 'status' => self::PROJECT_STATUS, + ]); + assert($project instanceof L10nServerProjectInterface); + $project->save(); + + $this->logger->notice('Project %title (%uri) added.', [ + '%title' => $title, + '%uri' => $machine_name, + ]); + + return $project; + } + + /** + * Writes project and release entities based on data from releases.tsv. + * + * @param non-empty-list $records + * List of records from the releases.tsv file, up to a cutoff date. + * + * @return array{int, int} + * An array with the number of newly created project entities, and the + * number of newly created release entities. + */ + private function writeEntities(array $records): array { $release_storage = $this->entityTypeManager ->getStorage('l10n_server_release'); - - $this->releaseCount = 0; - foreach ($this->releases as $release) { - $download_link = "https://ftp.drupal.org/files/projects/{$release['machine_name']}-{$release['version']}.tar.gz"; + $release_entities_created_count = 0; + $project_entities_created_count = 0; + $project_ids = []; + foreach ($records as $csv_record) { + $created = strtotime($csv_record['created'] . ' UTC'); + $machine_name = trim($csv_record['project_machine_name']); + $title = trim($csv_record['project_name']); + $version = $csv_record['version']; + + // Always load and update the project, even if the release already exists. + // This is necessary when the project title has changed. + $project_id = $this->getProjectId($machine_name, $title, $project_ids, $project_entities_created_count); + + $download_link = "https://ftp.drupal.org/files/projects/$machine_name-$version.tar.gz"; if ($release_storage->getQuery()->accessCheck(TRUE)->condition('download_link', $download_link)->execute()) { + // The release already exists. // @todo (D7) What happens to unpublished releases? drop data outright? + continue; } - else { - // Get the project id. - $projects = $project_storage->getQuery() - ->accessCheck(TRUE) - ->condition('uri', $release['machine_name']) - ->range(0, 1) - ->accessCheck(FALSE) - ->execute(); - $pid = reset($projects); - - // @todo (d7) What about filehash? - $filehash = ''; - - // New published release, not recorded before. - $release_storage->create([ - 'pid' => $pid, - 'title' => $this->t('@title @version', [ - '@title' => $release['title'], - '@version' => $release['version'], - ]), - 'version' => $release['version'], - 'download_link' => $download_link, - 'file_date' => $release['created'], - 'file_hash' => $filehash, - 'last_parsed' => 0, - 'weight' => 0, - ])->save(); - - $this->releaseCount++; - $this->logger->notice('Release %title from project %name added.', [ - '%title' => $release['version'], - '%name' => $release['machine_name'], - ]); - - // Update last sync date with the date of this release if later. - $this->lastSyncTime = max($this->lastSyncTime, $release['created']); - } + + // @todo (d7) What about filehash? + $filehash = ''; + + // New published release, not recorded before. + $release_storage->create([ + 'pid' => $project_id, + 'title' => $this->t('@title @version', [ + '@title' => $title, + '@version' => $version, + ]), + 'version' => $version, + 'download_link' => $download_link, + 'file_date' => $created, + 'file_hash' => $filehash, + 'last_parsed' => 0, + 'weight' => 0, + ])->save(); + + ++$release_entities_created_count; + $this->logger->notice('Release %title from project %name added.', [ + '%title' => $version, + '%name' => $machine_name, + ]); } + + return [$project_entities_created_count, $release_entities_created_count]; } } diff --git a/connectors/l10n_drupal_rest/tests/src/Kernel/DrupalRestConnectorTest.php b/connectors/l10n_drupal_rest/tests/src/Kernel/DrupalRestConnectorTest.php index 4e28cb62cf89dd384707c33c8d4d07b756581725..01d746437b39a3054098592d4407c40e3fb23796 100644 --- a/connectors/l10n_drupal_rest/tests/src/Kernel/DrupalRestConnectorTest.php +++ b/connectors/l10n_drupal_rest/tests/src/Kernel/DrupalRestConnectorTest.php @@ -13,6 +13,8 @@ use Drupal\l10n_drupal_rest\Plugin\l10n_server\Connector\DrupalRest; use Drupal\l10n_server\ConnectorManagerInterface; use Drupal\l10n_server\ConnectorParseHandlerResult; use Drupal\l10n_server\ConnectorScanHandlerResult; +use Drupal\l10n_server\Entity\L10nServerProject; +use Drupal\l10n_server\Entity\L10nServerRelease; use Drupal\Tests\l10n_server\Helper\Any; use Drupal\Tests\l10n_server\Helper\ConsumingTestLoggerWrapper; use Drupal\Tests\l10n_server\Helper\EntitiesCensus; @@ -154,10 +156,8 @@ class DrupalRestConnectorTest extends KernelTestBase { // The result is empty. $this->assertEquals(new ConnectorScanHandlerResult(), $result); - // A regular message is logged when the attempt starts. - $this->assertLogRecord('Fetching project list...'); - // An error is logged when it fails. - $this->loggerWrapper->assertNextMessage(pattern: '#^Error fetching project list: . The URI .* is invalid.#', level: RfcLogLevel::ERROR); + // An error is logged. + $this->assertLogRecord('The download url for releases.tsv is not configured.', level: RfcLogLevel::ERROR, channel: 'l10n_server'); } /** @@ -176,13 +176,12 @@ class DrupalRestConnectorTest extends KernelTestBase { // A number of log messages are caused by the scan handler operation. $this->assertLogRecord('Fetching project list...'); $this->assertLogRecord('Parsing project list...'); - $this->assertLogRecord('Storing project list...'); + $this->assertLogRecord('Writing project and release entities...'); $this->assertLogRecord('Creating new project...'); $this->assertLogRecord('Project %title (%uri) added.', ['%title' => 'Pathauto']); + $this->assertLogRecord('Release %title from project %name added.', ['%title' => '8.x-1.14', '%name' => 'pathauto']); $this->assertLogRecord('Creating new project...'); $this->assertLogRecord('Project %title (%uri) added.', ['%title' => 'Admin Toolbar']); - $this->assertLogRecord('Storing release list...'); - $this->assertLogRecord('Release %title from project %name added.', ['%title' => '8.x-1.14', '%name' => 'pathauto']); $this->assertLogRecord('Release %title from project %name added.', ['%title' => '3.6.2', '%name' => 'admin_toolbar']); $this->assertLogRecord('Release %title from project %name added.', ['%title' => '8.x-1.9', '%name' => 'pathauto']); @@ -233,7 +232,7 @@ class DrupalRestConnectorTest extends KernelTestBase { version: '8.x-1.14', download_link: 'https://ftp.drupal.org/files/projects/pathauto-8.x-1.14.tar.gz', file_hash: NULL, - file_date: 1759798492, + file_date: 1759838092, last_parsed: 0, queued_time: \Drupal::time()->getRequestTime(), source_string_count: 0, @@ -251,7 +250,7 @@ class DrupalRestConnectorTest extends KernelTestBase { version: '3.6.2', download_link: 'https://ftp.drupal.org/files/projects/admin_toolbar-3.6.2.tar.gz', file_hash: NULL, - file_date: 1753790102, + file_date: 1753826102, last_parsed: 0, queued_time: \Drupal::time()->getRequestTime(), source_string_count: 0, @@ -291,8 +290,7 @@ class DrupalRestConnectorTest extends KernelTestBase { // A number of log messages are caused by the scan handler operation. $this->assertLogRecord('Fetching project list...'); $this->assertLogRecord('Parsing project list...'); - $this->assertLogRecord('Storing project list...'); - $this->assertLogRecord('Storing release list...'); + $this->assertLogRecord('Writing project and release entities...'); // The result object provides a report. // No new entities are created. @@ -300,6 +298,89 @@ class DrupalRestConnectorTest extends KernelTestBase { $this->assertSame(0, $result->getReleaseCount()); } + /** + * Tests the batch functionality. + */ + public function testScanHandlerIncremental(): void { + \Drupal::configFactory()->getEditable('l10n_server.settings') + ->set('connectors.drupal_rest:restapi.source.restapi.refresh_url', 'https://www.drupal.org/files/releases.tsv') + ->save(); + $this->prepareReleasesTsvRequest(); + $connector = $this->createConnectorPlugin(); + + // Initial call just to get the remaining count. + $result = $connector->scanHandler(0, FALSE); + $this->assertEquals(new ConnectorScanHandlerResult( + remaining: 3, + ), $result); + $this->entitiesCensus->update([ + 'file' => 1, + ]); + + $this->assertLogRecord('Fetching project list...'); + $this->assertLogRecord('Parsing project list...'); + $this->assertBalance(); + + // First batch of 2 items. + $result = $connector->scanHandler(2, TRUE); + $this->assertEquals(new ConnectorScanHandlerResult( + projects: 2, + releases: 2, + processed: 2, + remaining: 1, + ), $result); + $new_ids_by_type = $this->entitiesCensus->update([ + 'l10n_server_project' => 2, + 'l10n_server_release' => 2, + ]); + $this->assertProjectEntity( + $new_ids_by_type['l10n_server_project'][0], + title: 'Admin Toolbar', + ); + $this->assertProjectEntity( + $new_ids_by_type['l10n_server_project'][1], + title: 'Pathauto', + ); + $this->assertReleaseEntity( + $new_ids_by_type['l10n_server_release'][0], + title: 'Admin Toolbar 3.6.2', + ); + $this->assertReleaseEntity( + $new_ids_by_type['l10n_server_release'][1], + title: 'Pathauto 8.x-1.9', + ); + + $this->assertLogRecord('Parsing project list...'); + $this->assertLogRecord('Writing project and release entities...'); + $this->assertLogRecord('Creating new project...'); + $this->assertLogRecord('Project %title (%uri) added.', ['%title' => 'Admin Toolbar']); + $this->assertLogRecord('Release %title from project %name added.', ['%title' => '3.6.2', '%name' => 'admin_toolbar']); + $this->assertLogRecord('Creating new project...'); + $this->assertLogRecord('Project %title (%uri) added.', ['%title' => 'Pathauto']); + $this->assertLogRecord('Release %title from project %name added.', ['%title' => '8.x-1.9', '%name' => 'pathauto']); + + $this->assertBalance(); + + // Second batch with 1 of 5 items. + $result = $connector->scanHandler(5, TRUE); + $this->assertEquals(new ConnectorScanHandlerResult( + releases: 1, + processed: 1, + remaining: 0, + ), $result); + $new_ids_by_type = $this->entitiesCensus->update([ + 'l10n_server_release' => 1, + ]); + $this->assertReleaseEntity( + $new_ids_by_type['l10n_server_release'][0], + title: 'Pathauto 8.x-1.14', + ); + + $this->assertLogRecord('Parsing project list...'); + $this->assertLogRecord('Writing project and release entities...'); + $this->assertLogRecord('Release %title from project %name added.', ['%title' => '8.x-1.14', '%name' => 'pathauto']); + } + /** * Tests parsing different example files. * @@ -446,8 +527,8 @@ class DrupalRestConnectorTest extends KernelTestBase { * @param int|null $level * The expected log level, or NULL to not assert. */ - protected function assertLogRecord(string $message, array $context = [], ?int $level = RfcLogLevel::NOTICE): void { - $this->loggerWrapper->assertNextMessage($message, context: $context, level: $level); + protected function assertLogRecord(string $message, array $context = [], ?int $level = RfcLogLevel::NOTICE, ?string $channel = NULL): void { + $this->loggerWrapper->assertNextMessage($message, context: $context, level: $level, channel: $channel); } } diff --git a/l10n_server/src/ConnectorScanHandlerInterface.php b/l10n_server/src/ConnectorScanHandlerInterface.php index babe6a4bba686adcb63dba4d83d863e25f144172..3e84cb8383035e1c9f5d479817b7dd33a7019a3a 100644 --- a/l10n_server/src/ConnectorScanHandlerInterface.php +++ b/l10n_server/src/ConnectorScanHandlerInterface.php @@ -10,11 +10,17 @@ namespace Drupal\l10n_server; interface ConnectorScanHandlerInterface { /** - * Scan handler. + * Fetches release information and creates project and release entities. + * + * @param int|null $row_limit + * Maximum number of source records to process, or NULL for no limit. + * @param bool $resume + * TRUE to resume processing existing source data. + * FALSE to force getting fresh source data. * * @return \Drupal\l10n_server\ConnectorScanHandlerResultInterface - * The scan handler result object. + * A result object with created entity counts. */ - public function scanHandler(): ConnectorScanHandlerResultInterface; + public function scanHandler(?int $row_limit = NULL, bool $resume = FALSE): ConnectorScanHandlerResultInterface; } diff --git a/l10n_server/src/ConnectorScanHandlerResult.php b/l10n_server/src/ConnectorScanHandlerResult.php index 71f5c06306678bd3db156bb831e42d382239cbaa..6478b9b8620d9c6e4cc9d0a1b8e9d652e20b4915 100644 --- a/l10n_server/src/ConnectorScanHandlerResult.php +++ b/l10n_server/src/ConnectorScanHandlerResult.php @@ -10,93 +10,50 @@ namespace Drupal\l10n_server; class ConnectorScanHandlerResult implements ConnectorScanHandlerResultInterface { /** - * Project counter. + * Constructs a new instance. * - * @var int - */ - protected int $projects; - - /** - * Release counter. - * - * @var int - */ - protected int $releases; - - /** - * Class constructor. - * - * @param array $options - * The constructor options: - * - projects: a project count integer. - * - releases: a release count integer. - */ - public function __construct(array $options = []) { - $this->projects = $options['projects'] ?? 0; - $this->releases = $options['releases'] ?? 0; - } + * @param int $projects + * Number of newly created project entities. + * @param int $releases + * Number of newly created release entities. + * @param int $processed + * Number of source items that were processed. + * @param int $remaining + * Number of remaining source items that need processing. + */ + public function __construct( + protected readonly int $projects = 0, + protected readonly int $releases = 0, + protected readonly int $processed = 0, + protected readonly int $remaining = 0, + ) {} /** * {@inheritdoc} */ public function getProjectCount(): int { - return (int) $this->projects; - } - - /** - * {@inheritdoc} - */ - public function setProjectCount(int $count): self { - $this->projects = $count; - return $this; - } - - /** - * {@inheritdoc} - */ - public function increaseProjectCount(?int $count = NULL): self { - if ($count) { - $this->projects = $this->projects + $count; - } - else { - $this->projects++; - } - return $this; + return $this->projects; } /** * {@inheritdoc} */ public function getReleaseCount(): int { - return (int) $this->releases; - } - - /** - * {@inheritdoc} - */ - public function setReleaseCount(int $count): self { - $this->releases = $count; - return $this; + return $this->releases; } /** * {@inheritdoc} */ - public function increaseReleaseCount(?int $count = NULL): self { - if ($count) { - $this->releases = $this->releases + $count; - } - else { - $this->releases++; - } - return $this; + public function getProcessedCount(): int { + return $this->processed; } /** * {@inheritdoc} */ - public function getSum(): int { - return (int) ($this->projects + $this->releases); + public function getRemainingCount(): int { + return $this->remaining; } } diff --git a/l10n_server/src/ConnectorScanHandlerResultInterface.php b/l10n_server/src/ConnectorScanHandlerResultInterface.php index 6067e9d850f0620b43e16f123f8e3cebea3bbb35..60159bab47549773586dc696ec4ba342e5f13971 100644 --- a/l10n_server/src/ConnectorScanHandlerResultInterface.php +++ b/l10n_server/src/ConnectorScanHandlerResultInterface.php @@ -10,67 +10,23 @@ namespace Drupal\l10n_server; interface ConnectorScanHandlerResultInterface { /** - * Gets the project count. - * - * @return int - * The project count. + * Gets the number of newly created project entities. */ public function getProjectCount(): int; /** - * Sets the project count. - * - * @param int $count - * The project count. - * - * @return $this - */ - public function setProjectCount(int $count): self; - - /** - * Increase project count. - * - * @param int|null $count - * An integer to increase the count with. - * - * @return $this - */ - public function increaseProjectCount(?int $count = NULL): self; - - /** - * Gets the release count. - * - * @return int - * The release count. + * Gets the number of newly created release entities. */ public function getReleaseCount(): int; /** - * Sets the release count. - * - * @param int $count - * The release count. - * - * @return $this - */ - public function setReleaseCount(int $count): self; - - /** - * Increase release count. - * - * @param int|null $count - * An integer to increase the count with. - * - * @return $this + * Gets the number of source items that were processed. */ - public function increaseReleaseCount(?int $count = NULL): self; + public function getProcessedCount(): int; /** - * Gets the sum of all counters. - * - * @return int - * The count integer. + * Gets the number of remaining source items that need processing. */ - public function getSum(): int; + public function getRemainingCount(): int; } diff --git a/l10n_server/src/Form/ConnectorBatchConfirmScanForm.php b/l10n_server/src/Form/ConnectorBatchConfirmScanForm.php index b4255950669a8e8e62589da1215095eaa1484c40..281546b7ef3f3eec284522dc1fbc5f00193886e7 100644 --- a/l10n_server/src/Form/ConnectorBatchConfirmScanForm.php +++ b/l10n_server/src/Form/ConnectorBatchConfirmScanForm.php @@ -8,6 +8,7 @@ use Drupal\Core\Form\FormStateInterface; use Drupal\Core\StringTranslation\TranslatableMarkup; use Drupal\Core\Url; use Drupal\l10n_server\ConnectorInterface; +use Drupal\l10n_server\ConnectorScanHandlerInterface; /** * Provides a confirmation form before clearing out the examples. @@ -37,12 +38,12 @@ class ConnectorBatchConfirmScanForm extends ConnectorBatchConfirmFormBase { $batch = [ 'title' => t('Scanning'), 'operations' => [], - 'finished' => static::class . '::batchFinished', + 'finished' => [static::class, 'batchFinished'], ]; $source_config = $connector->getSourceInstance()->getConfiguration(); for ($i = 0; $i < $source_config['scan_limit']; $i++) { $batch['operations'][] = [ - static::class . '::batchOperation', + [static::class, 'batchOperation'], [ $connector, ], @@ -61,18 +62,85 @@ class ConnectorBatchConfirmScanForm extends ConnectorBatchConfirmFormBase { * The batch context. */ public static function batchOperation(ConnectorInterface $connector, array &$context): void { - if (!$connector->isScannable()) { + if (!$connector instanceof ConnectorScanHandlerInterface) { return; } - /** @var \Drupal\l10n_server\ConnectorScanHandlerResult $result */ - $result = $connector->scanHandler(); + if (empty($context['sandbox'])) { + // Announce download. + $context['finished'] = 0; + $context['sandbox']['projects'] = 0; + $context['sandbox']['releases'] = 0; + $context['message'] = t('Downloading releases.tsv'); + return; + } + + $total = $context['sandbox']['total'] ?? NULL; + if ($total === NULL) { + // Announce and start import. + $result = $connector->scanHandler(0, FALSE); + $context['sandbox']['total'] = $result->getRemainingCount(); + $context['message'] = t('Creating project and release entities from releases.tsv.'); + $context['finished'] = 0; + return; + } + $result = $connector->scanHandler(1000, TRUE); + $remaining = $result->getRemainingCount(); + + $context['sandbox']['projects'] += $result->getProjectCount(); + $context['sandbox']['releases'] += $result->getReleaseCount(); - for ($i = 0; $i < $result->getProjectCount(); $i++) { - $context['results'][] = t('A project has been created.'); + if ($remaining === 0) { + // Finish. + $context['finished'] = 1; + $context['results'] = [ + 'rows' => $total, + 'projects' => $context['sandbox']['projects'], + 'releases' => $context['sandbox']['releases'], + ]; + return; + } + + $context['finished'] = 1 - $remaining / $total; + $context['message'] = t('Processed @processed out of @total rows. Created @projects projects. Created @releases releases.', [ + '@processed' => $total - $remaining, + '@total' => $total, + '@projects' => $context['sandbox']['projects'], + '@releases' => $context['sandbox']['releases'], + ]); + } + + /** + * Batch finished callback. + * + * @param bool $success + * The success status boolean. + * @param array $results + * The results array. + * @param array $operations + * The remaining operations. + * @param string $elapsed + * The time elapsed. + */ + public static function batchFinished(bool $success, array $results, array $operations, string $elapsed): void { + if ($success) { + // Here we do something meaningful with the results. + $message = t('Processed @rows source records. Created @projects projects. Created @releases releases.', [ + '@rows' => $results['rows'], + '@projects' => $results['projects'], + '@releases' => $results['releases'], + ]); + \Drupal::messenger()->addStatus($message); } - for ($i = 0; $i < $result->getReleaseCount(); $i++) { - $context['results'][] = t('A release has been created.'); + else { + // An error occurred. + // $operations contains the operations that remained unprocessed. + $error_operation = reset($operations); + $message = t('An error occurred while processing %error_operation with arguments: @arguments', [ + '%error_operation' => $error_operation[0], + '@arguments' => print_r($error_operation[1], TRUE), + ]); + \Drupal::messenger()->addError($message); } }