Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further separation of legacy dedupe finder from code to be cleaned up #32032

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 38 additions & 59 deletions CRM/Dedupe/BAO/DedupeRuleGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* @package CRM
* @copyright CiviCRM LLC https://civicrm.org/licensing
*/
use Civi\Api4\DedupeRuleGroup;
use Civi\Core\Event\GenericHookEvent;

/**
Expand Down Expand Up @@ -150,14 +151,26 @@ public static function hook_civicrm_findExistingDuplicates(GenericHookEvent $eve
if ($event->tableName) {
$contactIDs = explode(',', CRM_Core_DAO::singleValueQuery('SELECT GROUP_CONCAT(id) FROM ' . $event->tableName));
}
$tempTable = $ruleGroup->fillTable($ruleGroup->id, $contactIDs, [], FALSE);

$optimizer = new CRM_Dedupe_FinderQueryOptimizer($ruleGroup->id, $contactIDs, []);
$tableQueries = $optimizer->getRuleQueries();
if (empty($tableQueries)) {
return;
}

$ruleGroup = DedupeRuleGroup::get(FALSE)
->addWhere('id', '=', $ruleGroup->id)
->execute()
->first();
$self = new self();
$tempTable = $self->runTablesQuery([], $tableQueries, $ruleGroup['threshold']);
if (!$tempTable) {
return;
}
$aclFrom = $aclWhere = '';
$dedupeTable = $tempTable;
$contactType = $ruleGroup->contact_type;
$threshold = $ruleGroup->threshold;
$contactType = $ruleGroup['contact_type'];
$threshold = $ruleGroup['threshold'];

if ($event->checkPermissions) {
[$aclFrom, $aclWhere] = CRM_Contact_BAO_Contact_Permission::cacheClause(['c1', 'c2']);
Expand All @@ -183,24 +196,38 @@ public static function hook_civicrm_findExistingDuplicates(GenericHookEvent $eve
$duplicates[] = ['entity_id_1' => $dao->id1, 'entity_id_2' => $dao->id2, 'weight' => $dao->weight];
}
$event->duplicates = $duplicates;
\CRM_Core_DAO::executeQuery($ruleGroup->tableDropQuery());
\CRM_Core_DAO::executeQuery('DROP TEMPORARY TABLE IF EXISTS ' . $dedupeTable);
}

/**
* @throws \CRM_Core_Exception
* @throws \Civi\Core\Exception\DBQueryException
* @throws \Civi\API\Exception\UnauthorizedException
*/
public static function hook_civicrm_findDuplicates(GenericHookEvent $event): void {
if (!empty($event->dedupeResults['handled'])) {
// @todo - in time we can deprecate this & expect them to use stopPropagation().
return;
}
$rgBao = new CRM_Dedupe_BAO_DedupeRuleGroup();
$dedupeTable = $rgBao->fillTable($event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params'], FALSE);

$optimizer = new CRM_Dedupe_FinderQueryOptimizer($event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params']);
$tableQueries = $optimizer->getRuleQueries();
if (empty($tableQueries)) {
return;
}

$ruleGroup = DedupeRuleGroup::get(FALSE)
->addWhere('id', '=', $event->dedupeParams['rule_group_id'])
->execute()
->first();
$self = new self();
$dedupeTable = $self->runTablesQuery($event->dedupeParams['match_params'], $tableQueries, $ruleGroup['threshold']);
if (!$dedupeTable) {
$event->dedupeResults['ids'] = [];
return;
}
$aclFrom = '';
$aclWhere = '';
$contactType = $rgBao->contact_type;
$threshold = $rgBao->threshold;
if ($event->dedupeParams['check_permission']) {
[$aclFrom, $aclWhere] = CRM_Contact_BAO_Contact_Permission::cacheClause('civicrm_contact');
$aclWhere = $aclWhere ? "AND {$aclWhere}" : '';
Expand All @@ -212,67 +239,19 @@ public static function hook_civicrm_findDuplicates(GenericHookEvent $event): voi
AND weight >= %2";

$dao = CRM_Core_DAO::executeQuery($query, [
1 => [$contactType, 'String'],
2 => [$threshold, 'Integer'],
1 => [$ruleGroup['contact_type'], 'String'],
2 => [$ruleGroup['threshold'], 'Integer'],
]);
$dupes = [];
while ($dao->fetch()) {
if (isset($dao->id) && $dao->id) {
$dupes[] = $dao->id;
}
}
CRM_Core_DAO::executeQuery($rgBao->tableDropQuery());
CRM_Core_DAO::executeQuery('DROP TEMPORARY TABLE IF EXISTS ' . $dedupeTable);
$event->dedupeResults['ids'] = array_diff($dupes, $event->dedupeParams['excluded_contact_ids']);
}

/**
* Fill the dedupe finder table.
*
* @internal do not access from outside core.
*
* @param int $id
* @param array $contactIDs
* @param array $params
* @param bool $legacyMode
* Legacy mode is called to give backward hook compatibility, in the legacydedupefinder
* extension. It is intended to be transitional, with the non-legacy mode being
* separated out and optimized once it no longer has to comply with the legacy
* hook and reserved query methodology.
*
* @return false|string
* @throws \Civi\Core\Exception\DBQueryException
*/
public function fillTable(int $id, array $contactIDs, array $params, $legacyMode = TRUE) {
$ruleGroup = $this;
$ruleGroup->id = $id;
// make sure we've got a fetched dbrecord, not sure if this is enforced
$ruleGroup->find(TRUE);
$optimizer = new CRM_Dedupe_FinderQueryOptimizer($id, $contactIDs, $params);
// Reserved Rule Groups can optionally get special treatment by
// implementing an optimization class and returning a query array.
if ($legacyMode && $optimizer->isUseReservedQuery()) {
$tableQueries = $optimizer->getReservedQuery();
}
else {
$tableQueries = $optimizer->getRuleQueries();
}
// if there are no rules in this rule group
// add an empty query fulfilling the pattern
if ($legacyMode) {
if (!$tableQueries) {
// Just for the hook.... (which is deprecated).
$ruleGroup->noRules = TRUE;
}
CRM_Utils_Hook::dupeQuery($ruleGroup, 'table', $tableQueries);
}
if (empty($tableQueries)) {
return FALSE;
}
$threshold = $ruleGroup->threshold;

return $this->runTablesQuery($params, $tableQueries, $threshold);
}

/**
* Function to determine if a given query set contains inclusive or exclusive set of weights.
* The function assumes that the query set is already ordered by weight in desc order.
Expand Down
48 changes: 0 additions & 48 deletions CRM/Dedupe/FinderQueryOptimizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,30 +69,6 @@ public function __construct(int $dedupeRuleGroupID, array $contactIDs, array $pa
}
}

/**
* Is a file based reserved query configured.
*
* File based reserved queries were an early idea about how to optimise the dedupe queries.
*
* In theory extensions could implement them although there is no evidence any of them have.
* However, if these are implemented by core or by extensions we should not attempt to optimise
* the query by (e.g.) combining queries.
*
* In practice the queries implemented only return one query anyway
*
* @internal for core use only.
*
* @return bool
* @throws \CRM_Core_Exception
*
* @see \CRM_Dedupe_BAO_QueryBuilder_IndividualGeneral
* @see \CRM_Dedupe_BAO_QueryBuilder_IndividualSupervised
*/
public function isUseReservedQuery(): bool {
return $this->lookup('RuleGroup', 'is_reserved') &&
CRM_Utils_File::isIncludable('CRM/Dedupe/BAO/QueryBuilder/' . $this->lookup('RuleGroup', 'name') . '.php');
}

/**
* Return the SQL query for the given rule - either for finding matching
* pairs of contacts, or for matching against the $params variable (if set).
Expand Down Expand Up @@ -228,30 +204,6 @@ private function getContactIDFieldName(string $tableName): string {
throw new CRM_Core_Exception('invalid field');
}

/**
* Get the reserved query based on a static class.
*
* This was an early idea about optimisation & extendability. It is likely
* there are no implementations of rules this way outside the 3 core files.
*
* It is also likely the core files can go once we are optimising the queries based on the
* rule.
*
* @internal Do not call from outside of core.
*
* @return array
* @throws \CRM_Core_Exception
*/
public function getReservedQuery(): array {
$bao = new CRM_Dedupe_BAO_DedupeRuleGroup();
$bao->id = $this->lookup('RuleGroup', 'id');
$bao->find(TRUE);
$bao->params = $this->lookupParameters;
$bao->contactIds = $this->contactIDs;
$command = empty($this->lookupParameters) ? 'internal' : 'record';
return call_user_func(["CRM_Dedupe_BAO_QueryBuilder_" . $this->lookup('RuleGroup', 'name'), $command], $bao);
}

/**
* Get the queries to fill the table for the various rules.
*
Expand Down
47 changes: 45 additions & 2 deletions ext/legacydedupefinder/Civi/LegacyFinder/Finder.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ public static function findExistingDuplicates(GenericHookEvent $event): void {
$contactIDs = explode(',', \CRM_Core_DAO::singleValueQuery('SELECT GROUP_CONCAT(id) FROM ' . $event->tableName));
}
$ruleGroup->contactIds = $contactIDs;
$tempTable = $ruleGroup->fillTable($ruleGroup->id, $contactIDs, []);
// make sure we've got a fetched db record, not sure if this is enforced
$ruleGroup->find(TRUE);
$tempTable = self::fillTable($ruleGroup, $ruleGroup->id, $contactIDs, []);
if (!$tempTable) {
return;
}
Expand Down Expand Up @@ -79,7 +81,9 @@ public static function findDuplicates(GenericHookEvent $event): void {
}
$rgBao = new \CRM_Dedupe_BAO_DedupeRuleGroup();
$rgBao->params = $event->dedupeParams['match_params'];
$dedupeTable = $rgBao->fillTable($event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params'], TRUE);
// make sure we've got a fetched dbrecord, not sure if this is enforced
$rgBao->find(TRUE);
$dedupeTable = self::fillTable($rgBao, $event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params'], TRUE);
if (!$dedupeTable) {
$event->dedupeResults['ids'] = [];
return;
Expand Down Expand Up @@ -112,4 +116,43 @@ public static function findDuplicates(GenericHookEvent $event): void {
$event->dedupeResults['ids'] = array_diff($dupes, $event->dedupeParams['excluded_contact_ids']);
}

/**
* Fill the dedupe finder table.
*
* @internal do not access from outside core
*
* @param \CRM_Dedupe_BAO_DedupeRuleGroup $ruleGroup
* @param int $id
* @param array $contactIDs
* @param array $params
*
* @return false|string
* @throws \CRM_Core_Exception
*/
private static function fillTable($ruleGroup, int $id, array $contactIDs, array $params) {
$optimizer = new FinderQueryOptimizer($id, $contactIDs, $params);
// Reserved Rule Groups can optionally get special treatment by
// implementing an optimization class and returning a query array.
if ($optimizer->isUseReservedQuery()) {
$tableQueries = $optimizer->getReservedQuery();
}
else {
$tableQueries = $optimizer->getRuleQueries();
}
// if there are no rules in this rule group
// add an empty query fulfilling the pattern
if (!$tableQueries) {
// Just for the hook.... (which is deprecated).
$ruleGroup->noRules = TRUE;
}
\CRM_Utils_Hook::dupeQuery($ruleGroup, 'table', $tableQueries);

if (empty($tableQueries)) {
return FALSE;
}
$threshold = $ruleGroup->threshold;

return $ruleGroup->runTablesQuery($params, $tableQueries, $threshold);
}

}
Loading