From fd8ebd91385d7530140e93a85e908482a1d3ded5 Mon Sep 17 00:00:00 2001 From: eileen Date: Mon, 10 Feb 2025 18:43:09 +1300 Subject: [PATCH 1/3] Update to-keep functions to by-pass fillTable() --- CRM/Dedupe/BAO/DedupeRuleGroup.php | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/CRM/Dedupe/BAO/DedupeRuleGroup.php b/CRM/Dedupe/BAO/DedupeRuleGroup.php index d386f98bfc1d..0d3dfbaad81d 100644 --- a/CRM/Dedupe/BAO/DedupeRuleGroup.php +++ b/CRM/Dedupe/BAO/DedupeRuleGroup.php @@ -150,7 +150,15 @@ public static function hook_civicrm_findExistingDuplicates(GenericHookEvent $eve if ($event->tableName) { $contactIDs = explode(',', CRM_Core_DAO::singleValueQuery('SELECT GROUP_CONCAT(id) FROM ' . $event->tableName)); } - $tempTable = $ruleGroup->fillTable($ruleGroup->id, $contactIDs, [], FALSE); + + $optimizer = new CRM_Dedupe_FinderQueryOptimizer($ruleGroup->id, $contactIDs, []); + $tableQueries = $optimizer->getRuleQueries(); + if (empty($tableQueries)) { + return; + } + $threshold = $ruleGroup->threshold; + + $tempTable = $ruleGroup->runTablesQuery([], $tableQueries, $threshold); if (!$tempTable) { return; } @@ -191,16 +199,24 @@ public static function hook_civicrm_findDuplicates(GenericHookEvent $event): voi // @todo - in time we can deprecate this & expect them to use stopPropagation(). return; } - $rgBao = new CRM_Dedupe_BAO_DedupeRuleGroup(); - $dedupeTable = $rgBao->fillTable($event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params'], FALSE); + $ruleGroup = new CRM_Dedupe_BAO_DedupeRuleGroup(); + + $optimizer = new CRM_Dedupe_FinderQueryOptimizer($event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params']); + $tableQueries = $optimizer->getRuleQueries(); + if (empty($tableQueries)) { + return; + } + $threshold = $ruleGroup->threshold; + + $dedupeTable = $ruleGroup->runTablesQuery($event->dedupeParams['match_params'], $tableQueries, $threshold); if (!$dedupeTable) { $event->dedupeResults['ids'] = []; return; } $aclFrom = ''; $aclWhere = ''; - $contactType = $rgBao->contact_type; - $threshold = $rgBao->threshold; + $contactType = $ruleGroup->contact_type; + $threshold = $ruleGroup->threshold; if ($event->dedupeParams['check_permission']) { [$aclFrom, $aclWhere] = CRM_Contact_BAO_Contact_Permission::cacheClause('civicrm_contact'); $aclWhere = $aclWhere ? "AND {$aclWhere}" : ''; @@ -221,7 +237,7 @@ public static function hook_civicrm_findDuplicates(GenericHookEvent $event): voi $dupes[] = $dao->id; } } - CRM_Core_DAO::executeQuery($rgBao->tableDropQuery()); + CRM_Core_DAO::executeQuery($ruleGroup->tableDropQuery()); $event->dedupeResults['ids'] = array_diff($dupes, $event->dedupeParams['excluded_contact_ids']); } From b66338554dcde08b87cd6aa89ab076db738e080a Mon Sep 17 00:00:00 2001 From: eileen Date: Mon, 10 Feb 2025 18:48:00 +1300 Subject: [PATCH 2/3] Move internal function fillTable to the legacy code --- CRM/Dedupe/BAO/DedupeRuleGroup.php | 71 ++++--------------- .../Civi/LegacyFinder/Finder.php | 47 +++++++++++- 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/CRM/Dedupe/BAO/DedupeRuleGroup.php b/CRM/Dedupe/BAO/DedupeRuleGroup.php index 0d3dfbaad81d..e494d3bf839c 100644 --- a/CRM/Dedupe/BAO/DedupeRuleGroup.php +++ b/CRM/Dedupe/BAO/DedupeRuleGroup.php @@ -14,6 +14,7 @@ * @package CRM * @copyright CiviCRM LLC https://civicrm.org/licensing */ +use Civi\Api4\DedupeRuleGroup; use Civi\Core\Event\GenericHookEvent; /** @@ -194,29 +195,35 @@ public static function hook_civicrm_findExistingDuplicates(GenericHookEvent $eve \CRM_Core_DAO::executeQuery($ruleGroup->tableDropQuery()); } + /** + * @throws \CRM_Core_Exception + * @throws \Civi\Core\Exception\DBQueryException + * @throws \Civi\API\Exception\UnauthorizedException + */ public static function hook_civicrm_findDuplicates(GenericHookEvent $event): void { if (!empty($event->dedupeResults['handled'])) { // @todo - in time we can deprecate this & expect them to use stopPropagation(). return; } - $ruleGroup = new CRM_Dedupe_BAO_DedupeRuleGroup(); $optimizer = new CRM_Dedupe_FinderQueryOptimizer($event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params']); $tableQueries = $optimizer->getRuleQueries(); if (empty($tableQueries)) { return; } - $threshold = $ruleGroup->threshold; - $dedupeTable = $ruleGroup->runTablesQuery($event->dedupeParams['match_params'], $tableQueries, $threshold); + $ruleGroup = DedupeRuleGroup::get(FALSE) + ->addWhere('id', '=', $event->dedupeParams['rule_group_id']) + ->execute() + ->first(); + $self = new self(); + $dedupeTable = $self->runTablesQuery($event->dedupeParams['match_params'], $tableQueries, $ruleGroup['threshold']); if (!$dedupeTable) { $event->dedupeResults['ids'] = []; return; } $aclFrom = ''; $aclWhere = ''; - $contactType = $ruleGroup->contact_type; - $threshold = $ruleGroup->threshold; if ($event->dedupeParams['check_permission']) { [$aclFrom, $aclWhere] = CRM_Contact_BAO_Contact_Permission::cacheClause('civicrm_contact'); $aclWhere = $aclWhere ? "AND {$aclWhere}" : ''; @@ -228,8 +235,8 @@ public static function hook_civicrm_findDuplicates(GenericHookEvent $event): voi AND weight >= %2"; $dao = CRM_Core_DAO::executeQuery($query, [ - 1 => [$contactType, 'String'], - 2 => [$threshold, 'Integer'], + 1 => [$ruleGroup['contact_type'], 'String'], + 2 => [$ruleGroup['threshold'], 'Integer'], ]); $dupes = []; while ($dao->fetch()) { @@ -237,58 +244,10 @@ public static function hook_civicrm_findDuplicates(GenericHookEvent $event): voi $dupes[] = $dao->id; } } - CRM_Core_DAO::executeQuery($ruleGroup->tableDropQuery()); + CRM_Core_DAO::executeQuery('DROP TEMPORARY TABLE IF EXISTS ' . $dedupeTable); $event->dedupeResults['ids'] = array_diff($dupes, $event->dedupeParams['excluded_contact_ids']); } - /** - * Fill the dedupe finder table. - * - * @internal do not access from outside core. - * - * @param int $id - * @param array $contactIDs - * @param array $params - * @param bool $legacyMode - * Legacy mode is called to give backward hook compatibility, in the legacydedupefinder - * extension. It is intended to be transitional, with the non-legacy mode being - * separated out and optimized once it no longer has to comply with the legacy - * hook and reserved query methodology. - * - * @return false|string - * @throws \Civi\Core\Exception\DBQueryException - */ - public function fillTable(int $id, array $contactIDs, array $params, $legacyMode = TRUE) { - $ruleGroup = $this; - $ruleGroup->id = $id; - // make sure we've got a fetched dbrecord, not sure if this is enforced - $ruleGroup->find(TRUE); - $optimizer = new CRM_Dedupe_FinderQueryOptimizer($id, $contactIDs, $params); - // Reserved Rule Groups can optionally get special treatment by - // implementing an optimization class and returning a query array. - if ($legacyMode && $optimizer->isUseReservedQuery()) { - $tableQueries = $optimizer->getReservedQuery(); - } - else { - $tableQueries = $optimizer->getRuleQueries(); - } - // if there are no rules in this rule group - // add an empty query fulfilling the pattern - if ($legacyMode) { - if (!$tableQueries) { - // Just for the hook.... (which is deprecated). - $ruleGroup->noRules = TRUE; - } - CRM_Utils_Hook::dupeQuery($ruleGroup, 'table', $tableQueries); - } - if (empty($tableQueries)) { - return FALSE; - } - $threshold = $ruleGroup->threshold; - - return $this->runTablesQuery($params, $tableQueries, $threshold); - } - /** * Function to determine if a given query set contains inclusive or exclusive set of weights. * The function assumes that the query set is already ordered by weight in desc order. diff --git a/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php b/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php index 3135dac774fe..b5936d7c421d 100644 --- a/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php +++ b/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php @@ -29,7 +29,9 @@ public static function findExistingDuplicates(GenericHookEvent $event): void { $contactIDs = explode(',', \CRM_Core_DAO::singleValueQuery('SELECT GROUP_CONCAT(id) FROM ' . $event->tableName)); } $ruleGroup->contactIds = $contactIDs; - $tempTable = $ruleGroup->fillTable($ruleGroup->id, $contactIDs, []); + // make sure we've got a fetched dbrecord, not sure if this is enforced + $ruleGroup->find(TRUE); + $tempTable = self::fillTable($ruleGroup, $ruleGroup->id, $contactIDs, []); if (!$tempTable) { return; } @@ -79,7 +81,9 @@ public static function findDuplicates(GenericHookEvent $event): void { } $rgBao = new \CRM_Dedupe_BAO_DedupeRuleGroup(); $rgBao->params = $event->dedupeParams['match_params']; - $dedupeTable = $rgBao->fillTable($event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params'], TRUE); + // make sure we've got a fetched dbrecord, not sure if this is enforced + $rgBao->find(TRUE); + $dedupeTable = self::fillTable($rgBao, $event->dedupeParams['rule_group_id'], [], $event->dedupeParams['match_params'], TRUE); if (!$dedupeTable) { $event->dedupeResults['ids'] = []; return; @@ -112,4 +116,43 @@ public static function findDuplicates(GenericHookEvent $event): void { $event->dedupeResults['ids'] = array_diff($dupes, $event->dedupeParams['excluded_contact_ids']); } + + /** + * Fill the dedupe finder table. + * + * @internal do not access from outside core. + * + * @param int $id + * @param array $contactIDs + * @param array $params + * + * @return false|string + * @throws \Civi\Core\Exception\DBQueryException + */ + private static function fillTable($ruleGroup, int $id, array $contactIDs, array $params) { + $optimizer = new \CRM_Dedupe_FinderQueryOptimizer($id, $contactIDs, $params); + // Reserved Rule Groups can optionally get special treatment by + // implementing an optimization class and returning a query array. + if ($optimizer->isUseReservedQuery()) { + $tableQueries = $optimizer->getReservedQuery(); + } + else { + $tableQueries = $optimizer->getRuleQueries(); + } + // if there are no rules in this rule group + // add an empty query fulfilling the pattern + if (!$tableQueries) { + // Just for the hook.... (which is deprecated). + $ruleGroup->noRules = TRUE; + } + \CRM_Utils_Hook::dupeQuery($ruleGroup, 'table', $tableQueries); + + if (empty($tableQueries)) { + return FALSE; + } + $threshold = $ruleGroup->threshold; + + return $ruleGroup->runTablesQuery($params, $tableQueries, $threshold); + } + } From 940f010ef9c1c20215aec700e0cb90030144a1ed Mon Sep 17 00:00:00 2001 From: eileen Date: Mon, 10 Feb 2025 18:52:03 +1300 Subject: [PATCH 3/3] Take static copy of the query finder for the legacy code function --- CRM/Dedupe/BAO/DedupeRuleGroup.php | 14 +- CRM/Dedupe/FinderQueryOptimizer.php | 48 --- .../Civi/LegacyFinder/Finder.php | 10 +- .../LegacyFinder/FinderQueryOptimizer.php | 277 ++++++++++++++++++ 4 files changed, 291 insertions(+), 58 deletions(-) create mode 100644 ext/legacydedupefinder/Civi/LegacyFinder/FinderQueryOptimizer.php diff --git a/CRM/Dedupe/BAO/DedupeRuleGroup.php b/CRM/Dedupe/BAO/DedupeRuleGroup.php index e494d3bf839c..ad9c64cf1363 100644 --- a/CRM/Dedupe/BAO/DedupeRuleGroup.php +++ b/CRM/Dedupe/BAO/DedupeRuleGroup.php @@ -157,16 +157,20 @@ public static function hook_civicrm_findExistingDuplicates(GenericHookEvent $eve if (empty($tableQueries)) { return; } - $threshold = $ruleGroup->threshold; - $tempTable = $ruleGroup->runTablesQuery([], $tableQueries, $threshold); + $ruleGroup = DedupeRuleGroup::get(FALSE) + ->addWhere('id', '=', $ruleGroup->id) + ->execute() + ->first(); + $self = new self(); + $tempTable = $self->runTablesQuery([], $tableQueries, $ruleGroup['threshold']); if (!$tempTable) { return; } $aclFrom = $aclWhere = ''; $dedupeTable = $tempTable; - $contactType = $ruleGroup->contact_type; - $threshold = $ruleGroup->threshold; + $contactType = $ruleGroup['contact_type']; + $threshold = $ruleGroup['threshold']; if ($event->checkPermissions) { [$aclFrom, $aclWhere] = CRM_Contact_BAO_Contact_Permission::cacheClause(['c1', 'c2']); @@ -192,7 +196,7 @@ public static function hook_civicrm_findExistingDuplicates(GenericHookEvent $eve $duplicates[] = ['entity_id_1' => $dao->id1, 'entity_id_2' => $dao->id2, 'weight' => $dao->weight]; } $event->duplicates = $duplicates; - \CRM_Core_DAO::executeQuery($ruleGroup->tableDropQuery()); + \CRM_Core_DAO::executeQuery('DROP TEMPORARY TABLE IF EXISTS ' . $dedupeTable); } /** diff --git a/CRM/Dedupe/FinderQueryOptimizer.php b/CRM/Dedupe/FinderQueryOptimizer.php index 884984a19d3c..a4ee16a46f92 100644 --- a/CRM/Dedupe/FinderQueryOptimizer.php +++ b/CRM/Dedupe/FinderQueryOptimizer.php @@ -69,30 +69,6 @@ public function __construct(int $dedupeRuleGroupID, array $contactIDs, array $pa } } - /** - * Is a file based reserved query configured. - * - * File based reserved queries were an early idea about how to optimise the dedupe queries. - * - * In theory extensions could implement them although there is no evidence any of them have. - * However, if these are implemented by core or by extensions we should not attempt to optimise - * the query by (e.g.) combining queries. - * - * In practice the queries implemented only return one query anyway - * - * @internal for core use only. - * - * @return bool - * @throws \CRM_Core_Exception - * - * @see \CRM_Dedupe_BAO_QueryBuilder_IndividualGeneral - * @see \CRM_Dedupe_BAO_QueryBuilder_IndividualSupervised - */ - public function isUseReservedQuery(): bool { - return $this->lookup('RuleGroup', 'is_reserved') && - CRM_Utils_File::isIncludable('CRM/Dedupe/BAO/QueryBuilder/' . $this->lookup('RuleGroup', 'name') . '.php'); - } - /** * Return the SQL query for the given rule - either for finding matching * pairs of contacts, or for matching against the $params variable (if set). @@ -228,30 +204,6 @@ private function getContactIDFieldName(string $tableName): string { throw new CRM_Core_Exception('invalid field'); } - /** - * Get the reserved query based on a static class. - * - * This was an early idea about optimisation & extendability. It is likely - * there are no implementations of rules this way outside the 3 core files. - * - * It is also likely the core files can go once we are optimising the queries based on the - * rule. - * - * @internal Do not call from outside of core. - * - * @return array - * @throws \CRM_Core_Exception - */ - public function getReservedQuery(): array { - $bao = new CRM_Dedupe_BAO_DedupeRuleGroup(); - $bao->id = $this->lookup('RuleGroup', 'id'); - $bao->find(TRUE); - $bao->params = $this->lookupParameters; - $bao->contactIds = $this->contactIDs; - $command = empty($this->lookupParameters) ? 'internal' : 'record'; - return call_user_func(["CRM_Dedupe_BAO_QueryBuilder_" . $this->lookup('RuleGroup', 'name'), $command], $bao); - } - /** * Get the queries to fill the table for the various rules. * diff --git a/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php b/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php index b5936d7c421d..ad3a6b75b0ed 100644 --- a/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php +++ b/ext/legacydedupefinder/Civi/LegacyFinder/Finder.php @@ -29,7 +29,7 @@ public static function findExistingDuplicates(GenericHookEvent $event): void { $contactIDs = explode(',', \CRM_Core_DAO::singleValueQuery('SELECT GROUP_CONCAT(id) FROM ' . $event->tableName)); } $ruleGroup->contactIds = $contactIDs; - // make sure we've got a fetched dbrecord, not sure if this is enforced + // make sure we've got a fetched db record, not sure if this is enforced $ruleGroup->find(TRUE); $tempTable = self::fillTable($ruleGroup, $ruleGroup->id, $contactIDs, []); if (!$tempTable) { @@ -116,21 +116,21 @@ public static function findDuplicates(GenericHookEvent $event): void { $event->dedupeResults['ids'] = array_diff($dupes, $event->dedupeParams['excluded_contact_ids']); } - /** * Fill the dedupe finder table. * - * @internal do not access from outside core. + * @internal do not access from outside core * + * @param \CRM_Dedupe_BAO_DedupeRuleGroup $ruleGroup * @param int $id * @param array $contactIDs * @param array $params * * @return false|string - * @throws \Civi\Core\Exception\DBQueryException + * @throws \CRM_Core_Exception */ private static function fillTable($ruleGroup, int $id, array $contactIDs, array $params) { - $optimizer = new \CRM_Dedupe_FinderQueryOptimizer($id, $contactIDs, $params); + $optimizer = new FinderQueryOptimizer($id, $contactIDs, $params); // Reserved Rule Groups can optionally get special treatment by // implementing an optimization class and returning a query array. if ($optimizer->isUseReservedQuery()) { diff --git a/ext/legacydedupefinder/Civi/LegacyFinder/FinderQueryOptimizer.php b/ext/legacydedupefinder/Civi/LegacyFinder/FinderQueryOptimizer.php new file mode 100644 index 000000000000..31378556dfa3 --- /dev/null +++ b/ext/legacydedupefinder/Civi/LegacyFinder/FinderQueryOptimizer.php @@ -0,0 +1,277 @@ +define('DedupeRuleGroup', 'RuleGroup', ['id' => $dedupeRuleGroupID]); + foreach ($contactIDs as $cid) { + $this->contactIDs[] = \CRM_Utils_Type::escape($cid, 'Integer'); + } + $this->lookupParameters = $params; + $rules = DedupeRule::get(FALSE) + ->addSelect('*', 'dedupe_rule_group_id.threshold') + ->addWhere('dedupe_rule_group_id', '=', $dedupeRuleGroupID) + ->addOrderBy('rule_weight', 'DESC') + ->execute(); + foreach ($rules as $index => $rule) { + // Filter out the rule if this is a parameters lookup & it is not in the rules. + if (!$this->lookupParameters || (array_key_exists($rule['rule_table'], $this->lookupParameters) && array_key_exists($rule['rule_field'], $this->lookupParameters[$rule['rule_table']]))) { + $key = $rule['rule_table'] . '.' . $rule['rule_field'] . '.' . $rule['rule_weight']; + $this->queries[$key] = [ + 'table' => $rule['rule_table'], + 'field' => $rule['rule_field'], + 'weight' => $rule['rule_weight'], + 'length' => $rule['rule_length'], + 'key' => $key, + 'order' => $index + 1, + ]; + $this->queries[$key]['query'] = $this->getQuery($this->queries[$key]); + } + $this->threshold = $rule['dedupe_rule_group_id.threshold']; + } + } + + /** + * Is a file based reserved query configured. + * + * File based reserved queries were an early idea about how to optimise the dedupe queries. + * + * In theory extensions could implement them although there is no evidence any of them have. + * However, if these are implemented by core or by extensions we should not attempt to optimise + * the query by (e.g.) combining queries. + * + * In practice the queries implemented only return one query anyway + * + * @internal for core use only. + * + * @return bool + * @throws \CRM_Core_Exception + * + * @see \CRM_Dedupe_BAO_QueryBuilder_IndividualGeneral + * @see \CRM_Dedupe_BAO_QueryBuilder_IndividualSupervised + */ + public function isUseReservedQuery(): bool { + return $this->lookup('RuleGroup', 'is_reserved') && + \CRM_Utils_File::isIncludable('CRM/Dedupe/BAO/QueryBuilder/' . $this->lookup('RuleGroup', 'name') . '.php'); + } + + /** + * Return the SQL query for the given rule - either for finding matching + * pairs of contacts, or for matching against the $params variable (if set). + * + * @param array $rule + * + * @return string|null + * SQL query performing the search + * or NULL if params is present and doesn't have and for a field. + * + * @throws \CRM_Core_Exception + * @internal do not call from outside tested core code. No universe uses Feb 2024. + * + */ + public function getQuery(array $rule): ?string { + + $filter = $this->getRuleTableFilter($rule['table']); + $contactIDFieldName = $this->getContactIDFieldName($rule['table']); + + // build FROM (and WHERE, if it's a parametrised search) + // based on whether the rule is about substrings or not + if ($this->lookupParameters) { + $select = "t1.$contactIDFieldName id1, {$rule['weight']} weight"; + $subSelect = 'id1, weight'; + $where = $filter ? ['t1.' . $filter] : []; + $from = "{$rule['table']} t1"; + $str = 'NULL'; + if (isset($this->lookupParameters[$rule['table']][$rule['field']])) { + $str = trim(\CRM_Utils_Type::escape($this->lookupParameters[$rule['table']][$rule['field']], 'String')); + } + if ($rule['length']) { + $where[] = "SUBSTR(t1.{$rule['field']}, 1, {$rule['length']}) = SUBSTR('$str', 1, {$rule['length']})"; + $where[] = "t1.{$rule['field']} IS NOT NULL"; + } + else { + $where[] = "t1.{$rule['field']} = '$str'"; + } + } + else { + $select = "t1.$contactIDFieldName id1, t2.$contactIDFieldName id2, {$rule['weight']} weight"; + $subSelect = 'id1, id2, weight'; + $where = $filter ? [ + 't1.' . $filter, + 't2.' . $filter, + ] : []; + $where[] = "t1.$contactIDFieldName < t2.$contactIDFieldName"; + $from = "{$rule['table']} t1 INNER JOIN {$rule['table']} t2 ON (" . self::getRuleFieldFilter($rule) . ")"; + } + + $sql = "SELECT $select FROM $from WHERE " . implode(' AND ', $where); + if ($this->contactIDs) { + $cids = $this->contactIDs; + $sql .= " AND t1.$contactIDFieldName IN (" . implode(',', $cids) . ") + UNION $sql AND t2.$contactIDFieldName IN (" . implode(',', $cids) . ")"; + + // The `weight` is ambiguous in the context of the union; put the whole + // thing in a subquery. + $sql = "SELECT $subSelect FROM ($sql) subunion"; + } + return $sql; + } + + /** + * Get any where filter that restricts the specific table. + * + * Generally this is along the lines of entity_table = civicrm_contact + * although for the contact table it could be the id restriction. + * + * @param string $tableName + * + * @return string + * @throws \CRM_Core_Exception + */ + private function getRuleTableFilter(string $tableName): string { + if ($tableName === 'civicrm_contact') { + return "contact_type = '" . $this->lookup('RuleGroup', 'contact_type') . "'"; + } + $dynamicReferences = \CRM_Core_DAO::getDynamicReferencesToTable('civicrm_contact')[$tableName] ?? NULL; + if (!$dynamicReferences) { + return ''; + } + if (!empty(\CRM_Core_DAO::getDynamicReferencesToTable('civicrm_contact')[$tableName])) { + return $dynamicReferences[1] . "= 'civicrm_contact'"; + } + return ''; + } + + /** + * @param array $rule + * + * @return string + * @throws \CRM_Core_Exception + */ + private function getRuleFieldFilter(array $rule): string { + if ($rule['length']) { + $on = ["SUBSTR(t1.{$rule['field']}, 1, {$rule['length']}) = SUBSTR(t2.{$rule['field']}, 1, {$rule['length']})"]; + return "(" . implode(' AND ', $on) . ")"; + } + $innerJoinClauses = [ + "t1.{$rule['field']} IS NOT NULL", + "t2.{$rule['field']} IS NOT NULL", + "t1.{$rule['field']} = t2.{$rule['field']}", + ]; + + if (in_array(\CRM_Dedupe_BAO_DedupeRule::getFieldType($rule['field'], $rule['table']), \CRM_Utils_Type::getTextTypes(), TRUE)) { + $innerJoinClauses[] = "t1.{$rule['field']} <> ''"; + $innerJoinClauses[] = "t2.{$rule['field']} <> ''"; + } + return "(" . implode(' AND ', $innerJoinClauses) . ")"; + } + + /** + * Get the name of the field in the table that refers to the Contact ID. + * + * e.g in civicrm_contact this is 'id' whereas in civicrm_address this is + * contact_id and in a custom field table it might be entity_id. + * + * @param string $tableName + * + * @return string + * Usually id, contact_id or entity_id. + * @throws \CRM_Core_Exception + */ + private function getContactIDFieldName(string $tableName): string { + if ($tableName === 'civicrm_contact') { + return 'id'; + } + if (isset(\CRM_Core_DAO::getDynamicReferencesToTable('civicrm_contact')[$tableName][0])) { + return \CRM_Core_DAO::getDynamicReferencesToTable('civicrm_contact')[$tableName][0]; + } + if (isset(\CRM_Core_DAO::getReferencesToContactTable()[$tableName][0])) { + return \CRM_Core_DAO::getReferencesToContactTable()[$tableName][0]; + } + throw new \CRM_Core_Exception('invalid field'); + } + + /** + * Get the reserved query based on a static class. + * + * This was an early idea about optimisation & extendability. It is likely + * there are no implementations of rules this way outside the 3 core files. + * + * It is also likely the core files can go once we are optimising the queries based on the + * rule. + * + * @internal Do not call from outside of core. + * + * @return array + * @throws \CRM_Core_Exception + */ + public function getReservedQuery(): array { + $bao = new \CRM_Dedupe_BAO_DedupeRuleGroup(); + $bao->id = $this->lookup('RuleGroup', 'id'); + $bao->find(TRUE); + $bao->params = $this->lookupParameters; + $bao->contactIds = $this->contactIDs; + $command = empty($this->lookupParameters) ? 'internal' : 'record'; + return call_user_func(["CRM_Dedupe_BAO_QueryBuilder_" . $this->lookup('RuleGroup', 'name'), $command], $bao); + } + + /** + * Get the queries to fill the table for the various rules. + * + * Return a set of SQL queries whose cumulative weights will mark matched + * records for the RuleGroup::thresholdQuery() to retrieve. + * + * @internal do not call from outside tested core code. + * + * @return array + */ + public function getRuleQueries(): array { + $queries = []; + foreach ($this->queries as $rule) { + $queries[$rule['key']] = $rule['query']; + } + return $queries; + } + +}