Solr_Base_Query.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. <?php
  2. /**
  3. * This class allows you to make operations on a query that will be sent to
  4. * Apache Solr. methods such as adding and removing sorts, remove and replace
  5. * parameters, adding and removing filters, getters and setters for various
  6. * parameters and more
  7. * @file
  8. * Class that defines the base query for the Apache Solr Drupal module.
  9. */
  10. class SolrFilterSubQuery {
  11. /**
  12. * Static shared by all instances, used to increment ID numbers.
  13. */
  14. protected static $idCount = 0;
  15. /**
  16. * Each query/subquery will have a unique ID.
  17. */
  18. public $id;
  19. public $operator;
  20. /**
  21. * A keyed array where the key is a position integer and the value
  22. * is an array with #name and #value properties. Each value is a
  23. * used for filter queries, e.g. array('#name' => 'is_uid', '#value' => 0)
  24. * for anonymous content.
  25. */
  26. protected $fields = array();
  27. /**
  28. * An array of subqueries.
  29. */
  30. protected $subqueries = array();
  31. function __construct($operator = 'OR') {
  32. $this->operator = $operator;
  33. $this->id = ++SolrFilterSubQuery::$idCount;
  34. }
  35. function __clone() {
  36. $this->id = ++SolrFilterSubQuery::$idCount;
  37. }
  38. public function getFilters($name = NULL) {
  39. if (empty($name)) {
  40. return $this->fields;
  41. }
  42. reset($this->fields);
  43. $matches = array();
  44. foreach ($this->fields as $filter) {
  45. if ($filter['#name'] == $name) {
  46. $matches[] = $filter;
  47. }
  48. }
  49. return $matches;
  50. }
  51. public function hasFilter($name, $value, $exclude = FALSE) {
  52. foreach ($this->fields as $pos => $values) {
  53. if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
  54. return TRUE;
  55. }
  56. }
  57. return FALSE;
  58. }
  59. public function addFilter($name, $value, $exclude = FALSE, $local = '') {
  60. // @todo - escape the value if it has spaces in it and is not a range query or parenthesized.
  61. $filter = array(
  62. '#exclude' => (bool) $exclude,
  63. '#name' => trim($name),
  64. '#value' => trim($value),
  65. '#local' => trim($local),
  66. );
  67. $this->fields[] = $filter;
  68. return $this;
  69. }
  70. public function removeFilter($name, $value = NULL, $exclude = FALSE) {
  71. // Remove from the public list of filters.
  72. $this->unsetFilter($this->fields, $name, $value, $exclude);
  73. return $this;
  74. }
  75. protected function unsetFilter(&$fields, $name, $value, $exclude) {
  76. if (!isset($value)) {
  77. foreach ($fields as $pos => $values) {
  78. if ($values['#name'] == $name) {
  79. unset($fields[$pos]);
  80. }
  81. }
  82. }
  83. else {
  84. foreach ($fields as $pos => $values) {
  85. if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
  86. unset($fields[$pos]);
  87. }
  88. }
  89. }
  90. }
  91. public function getFilterSubQueries() {
  92. return $this->subqueries;
  93. }
  94. public function addFilterSubQuery(SolrFilterSubQuery $query) {
  95. $this->subqueries[$query->id] = $query;
  96. return $this;
  97. }
  98. public function removeFilterSubQuery(SolrFilterSubQuery $query) {
  99. unset($this->subqueries[$query->id]);
  100. return $this;
  101. }
  102. public function removeFilterSubQueries() {
  103. $this->subqueries = array();
  104. return $this;
  105. }
  106. public function makeFilterQuery(array $filter) {
  107. $prefix = empty($filter['#exclude']) ? '' : '-';
  108. if ($filter['#local']) {
  109. $prefix = '{!' . $filter['#local'] . '}' . $prefix;
  110. }
  111. // If the field value contains a colon or a space, wrap it in double quotes,
  112. // unless it is a range query or is already wrapped in double quotes or
  113. // parentheses.
  114. if (preg_match('/[ :]/', $filter['#value']) && !preg_match('/^[\[\{]\S+ TO \S+[\]\}]$/', $filter['#value']) && !preg_match('/^["\(].*["\)]$/', $filter['#value'])) {
  115. $filter['#value'] = '"' . $filter['#value'] . '"';
  116. }
  117. return $prefix . $filter['#name'] . ':' . $filter['#value'];
  118. }
  119. /**
  120. * Make sure our query matches the pattern name:value or name:"value"
  121. * Make sure that if we are ranges we use name:[ AND ]
  122. * allowed inputs :
  123. * a. bundle:article
  124. * b. date:[1970-12-31T23:59:59Z TO NOW]
  125. * Split the text in 4 different parts
  126. * 1. name, eg.: bundle or date
  127. * 2. The first opening bracket (or nothing), eg.: [
  128. * 3. The value of the field, eg. article or 1970-12-31T23:59:59Z TO NOW
  129. * 4. The last closing bracket, eg.: ]
  130. * @param string $filter
  131. * The filter to validate
  132. * @return boolean
  133. */
  134. public static function validFilterValue($filter) {
  135. $name = NULL;
  136. $value = NULL;
  137. $matches = array();
  138. $datefields = array();
  139. $datefield_match = array();
  140. if (preg_match('/(?P<name>[^:]+):(?P<value>.+)?$/', $filter, $matches)) {
  141. foreach ($matches as $match_id => $match) {
  142. switch($match_id) {
  143. case 'name' :
  144. $name = $match;
  145. break;
  146. case 'value' :
  147. $value = $match;
  148. break;
  149. }
  150. }
  151. // For the name we allow any character that fits between the A-Z0-9 range and
  152. // any alternative for this in other languages. No special characters allowed.
  153. // Negative filters may have a leading "-".
  154. if (!preg_match('/^-?[a-zA-Z0-9_\x7f-\xff]+$/', $name)) {
  155. return FALSE;
  156. }
  157. // For the value we allow anything that is UTF8
  158. if (!drupal_validate_utf8($value)) {
  159. return FALSE;
  160. }
  161. // Check our bracket count. If it does not match it is also not valid
  162. $valid_brackets = TRUE;
  163. $brackets['opening']['{'] = substr_count($value, '{');
  164. $brackets['closing']['}'] = substr_count($value, '}');
  165. $valid_brackets = $valid_brackets && ($brackets['opening']['{'] == $brackets['closing']['}']);
  166. $brackets['opening']['['] = substr_count($value, '[');
  167. $brackets['closing'][']'] = substr_count($value, ']');
  168. $valid_brackets = $valid_brackets && ($brackets['opening']['['] == $brackets['closing'][']']);
  169. $brackets['opening']['('] = substr_count($value, '(');
  170. $brackets['closing'][')'] = substr_count($value, ')');
  171. $valid_brackets = $valid_brackets && ($brackets['opening']['('] == $brackets['closing'][')']);
  172. if (!$valid_brackets) {
  173. return FALSE;
  174. }
  175. // Check the date field inputs
  176. if (preg_match('/\[(.+) TO (.+)\]$/', $value, $datefields)) {
  177. // Only Allow a value in the form of
  178. // http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
  179. // http://lucene.apache.org/solr/api/org/apache/solr/util/DateMathParser.html
  180. // http://wiki.apache.org/solr/SolrQuerySyntax
  181. // 1976-03-06T23:59:59.999Z (valid)
  182. // * (valid)
  183. // 1995-12-31T23:59:59.999Z (valid)
  184. // 2007-03-06T00:00:00Z (valid)
  185. // NOW-1YEAR/DAY (valid)
  186. // NOW/DAY+1DAY (valid)
  187. // 1976-03-06T23:59:59.999Z (valid)
  188. // 1976-03-06T23:59:59.999Z+1YEAR (valid)
  189. // 1976-03-06T23:59:59.999Z/YEAR (valid)
  190. // 1976-03-06T23:59:59.999Z (valid)
  191. // 1976-03-06T23::59::59.999Z (invalid)
  192. if (!empty($datefields[1]) && !empty($datefields[2])) {
  193. // Do not check to full value, only the splitted ones
  194. unset($datefields[0]);
  195. // Check if both matches are valid datefields
  196. foreach ($datefields as $datefield) {
  197. if (!preg_match('/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:[\d\.]{2,6}Z(\S)*)|(^([A-Z\*]+)(\A-Z0-9\+\-\/)*)/', $datefield, $datefield_match)) {
  198. return FALSE;
  199. }
  200. }
  201. }
  202. }
  203. }
  204. return TRUE;
  205. }
  206. /**
  207. * Builds a set of filter queries from $this->fields and all subqueries.
  208. *
  209. * Returns an array of strings that can be combined into
  210. * a URL query parameter or passed to Solr as fq paramters.
  211. */
  212. protected function rebuildFq() {
  213. $fq = array();
  214. foreach ($this->fields as $pos => $field) {
  215. $fq[] = $this->makeFilterQuery($field);
  216. }
  217. foreach ($this->subqueries as $subquery) {
  218. $subfq = $subquery->rebuildFq();
  219. if ($subfq) {
  220. $operator = $subquery->operator;
  221. $fq[] = "(" . implode(" $operator ", $subfq) . ")";
  222. }
  223. }
  224. return $fq;
  225. }
  226. }
  227. class SolrBaseQuery extends SolrFilterSubQuery implements DrupalSolrQueryInterface {
  228. /**
  229. * The parameters that get sent to Solr.
  230. */
  231. protected $params = array('start' => 0, 'rows' => 10, 'fq' => array());
  232. /**
  233. * The search base path.
  234. */
  235. protected $base_path;
  236. protected $field_map = array();
  237. /**
  238. * DrupalApacheSolrService object
  239. */
  240. protected $solr;
  241. // The array keys must always be real Solr index fields.
  242. protected $available_sorts;
  243. /**
  244. * The query name is used to construct a searcher string. Mostly the
  245. * environment id
  246. */
  247. protected $name;
  248. protected $context = array();
  249. // Makes sure we always have a valid sort.
  250. protected $solrsort = array('#name' => 'score', '#direction' => 'desc');
  251. // A flag to allow the search to be aborted.
  252. public $abort_search = FALSE;
  253. // A flag to check if need to retrieve another page of the result set
  254. public $page = 0;
  255. /**
  256. * @param $name
  257. * The search name, used for finding the correct blocks and other config.
  258. * Typically "apachesolr".
  259. *
  260. * @param $solr
  261. * An instantiated DrupalApacheSolrService Object.
  262. * Can be instantiated from apachesolr_get_solr().
  263. *
  264. * @param $params
  265. * Array of params to initialize the object (typically 'q' and 'fq').
  266. *
  267. * @param $sortstring
  268. * Visible string telling solr how to sort - added to GET query params.
  269. *
  270. * @param $base_path
  271. * The search base path (without the keywords) for this query, without trailing slash.
  272. */
  273. function __construct($name, $solr, array $params = array(), $sortstring = '', $base_path = '', $context = array()) {
  274. parent::__construct();
  275. $this->name = $name;
  276. $this->solr = $solr;
  277. $this->addContext((array) $context);
  278. $this->addParams((array) $params);
  279. $this->available_sorts = $this->defaultSorts();
  280. $this->sortstring = trim($sortstring);
  281. $this->parseSortString();
  282. $this->base_path = $base_path;
  283. }
  284. protected function defaultSorts() {
  285. return array(
  286. 'score' => array('title' => t('Relevancy'), 'default' => 'desc'),
  287. 'sort_label' => array('title' => t('Title'), 'default' => 'asc'),
  288. 'bundle' => array('title' => t('Type'), 'default' => 'asc'),
  289. 'sort_name' => array('title' => t('Author'), 'default' => 'asc'),
  290. 'ds_created' => array('title' => t('Date'), 'default' => 'desc'),
  291. );
  292. }
  293. /**
  294. * Get query name.
  295. */
  296. public function getName() {
  297. return $this->name;
  298. }
  299. /**
  300. * Get query searcher name (for facetapi, views, pages, etc).
  301. */
  302. public function getSearcher() {
  303. return $this->name . '@' . $this->solr->getId();
  304. }
  305. /**
  306. * Get context values.
  307. */
  308. public function getContext() {
  309. return $this->context;
  310. }
  311. /**
  312. * Set context value.
  313. */
  314. public function addContext(array $context) {
  315. foreach ($context as $k => $v) {
  316. $this->context[$k] = $v;
  317. }
  318. // The env_id must match that of the actual $solr object
  319. $this->context['env_id'] = $this->solr->getId();
  320. return $this->context;
  321. }
  322. protected $single_value_params = array(
  323. 'q' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
  324. 'q.op' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q.op
  325. 'q.alt' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
  326. 'df' => TRUE,
  327. 'qt' => TRUE,
  328. 'defType' => TRUE,
  329. 'timeAllowed' => TRUE,
  330. 'omitHeader' => TRUE,
  331. 'debugQuery' => TRUE,
  332. 'start' => TRUE,
  333. 'rows' => TRUE,
  334. 'stats' => TRUE,
  335. 'facet' => TRUE,
  336. 'facet.prefix' => TRUE,
  337. 'facet.limit' => TRUE,
  338. 'facet.offset' => TRUE,
  339. 'facet.mincount' => TRUE,
  340. 'facet.missing' => TRUE,
  341. 'facet.method' => TRUE,
  342. 'facet.enum.cache.minDf' => TRUE,
  343. 'facet.date.start' => TRUE,
  344. 'facet.date.end' => TRUE,
  345. 'facet.date.gap' => TRUE,
  346. 'facet.date.hardend' => TRUE,
  347. 'facet.date.other' => TRUE,
  348. 'facet.date.include' => TRUE,
  349. 'hl' => TRUE,
  350. 'hl.snippets' => TRUE,
  351. 'hl.fragsize' => TRUE,
  352. 'hl.mergeContiguous' => TRUE,
  353. 'hl.requireFieldMatch' => TRUE,
  354. 'hl.maxAnalyzedChars' => TRUE,
  355. 'hl.alternateField' => TRUE,
  356. 'hl.maxAlternateFieldLength' => TRUE,
  357. 'hl.formatter' => TRUE,
  358. 'hl.simple.pre/hl.simple.post' => TRUE,
  359. 'hl.fragmenter' => TRUE,
  360. 'hl.fragListBuilder' => TRUE,
  361. 'hl.fragmentsBuilder' => TRUE,
  362. 'hl.useFastVectorHighlighter' => TRUE,
  363. 'hl.usePhraseHighlighter' => TRUE,
  364. 'hl.highlightMultiTerm' => TRUE,
  365. 'hl.regex.slop' => TRUE,
  366. 'hl.regex.pattern' => TRUE,
  367. 'hl.regex.maxAnalyzedChars' => TRUE,
  368. 'mm' => TRUE,
  369. 'spellcheck' => TRUE,
  370. );
  371. public function getParam($name) {
  372. if ($name == 'fq') {
  373. return $this->rebuildFq();
  374. }
  375. $empty = isset($this->single_value_params[$name]) ? NULL : array();
  376. return isset($this->params[$name]) ? $this->params[$name] : $empty;
  377. }
  378. public function getParams() {
  379. $params = $this->params;
  380. $params['fq'] = $this->rebuildFq();
  381. return $params;
  382. }
  383. public function getSolrParams() {
  384. $params = $this->getParams();
  385. // For certain fields Solr prefers a comma separated list.
  386. foreach (array('fl', 'hl.fl', 'sort', 'mlt.fl') as $name) {
  387. if (isset($params[$name])) {
  388. $params[$name] = implode(',', $params[$name]);
  389. }
  390. }
  391. return $params;
  392. }
  393. protected function addFq($string, $index = NULL) {
  394. $string = trim($string);
  395. $local = '';
  396. $exclude = FALSE;
  397. $name = NULL;
  398. $value = NULL;
  399. $matches = array();
  400. // Check if we are dealing with an exclude
  401. if (preg_match('/^-(.*)/', $string, $matches)) {
  402. $exclude = TRUE;
  403. $string = $matches[1];
  404. }
  405. // If {!something} is found as first character then this is a local value
  406. if (preg_match('/\{!([^}]+)\}(.*)/', $string, $matches)) {
  407. $local = $matches[1];
  408. $string = $matches[2];
  409. }
  410. // Anything that has a name and value
  411. // check if we have a : in the string
  412. if (strstr($string, ':')) {
  413. list($name, $value) = explode(":", $string, 2);
  414. }
  415. else {
  416. $value = $string;
  417. }
  418. $this->addFilter($name, $value, $exclude, $local);
  419. return $this;
  420. }
  421. public function addParam($name, $value) {
  422. if (isset($this->single_value_params[$name])) {
  423. if (is_array($value)) {
  424. $value = end($value);
  425. }
  426. $this->params[$name] = $this->normalizeParamValue($value);
  427. return $this;
  428. }
  429. // We never actually populate $this->params['fq']. Instead
  430. // we manage everything via the filter methods.
  431. if ($name == 'fq') {
  432. if (is_array($value)) {
  433. array_walk_recursive($value, array($this, 'addFq'));
  434. return $this;
  435. }
  436. else {
  437. return $this->addFq($value);
  438. }
  439. }
  440. if (!isset($this->params[$name])) {
  441. $this->params[$name] = array();
  442. }
  443. if (!is_array($value)) {
  444. // Convert to array for array_map.
  445. $param_values = array($value);
  446. }
  447. else {
  448. // Convert to a numerically keyed array.
  449. $param_values = array_values($value);
  450. }
  451. $this->params[$name] = array_merge($this->params[$name], array_map(array($this, 'normalizeParamValue'), $param_values));
  452. return $this;
  453. }
  454. protected function normalizeParamValue($value) {
  455. // Convert boolean to string.
  456. if (is_bool($value)) {
  457. return $value ? 'true' : 'false';
  458. }
  459. // Convert to trimmed string.
  460. return trim($value);
  461. }
  462. public function addParams(Array $params) {
  463. foreach ($params as $name => $value) {
  464. $this->addParam($name, $value);
  465. }
  466. return $this;
  467. }
  468. public function removeParam($name) {
  469. unset($this->params[$name]);
  470. if ($name == 'fq') {
  471. $this->fields = array();
  472. $this->subqueries = array();
  473. }
  474. return $this;
  475. }
  476. public function replaceParam($name, $value) {
  477. $this->removeParam($name);
  478. return $this->addParam($name, $value);
  479. }
  480. /**
  481. * Handles aliases for field to make nicer URLs.
  482. *
  483. * @param $field_map
  484. * An array keyed with real Solr index field names with the alias as value.
  485. *
  486. * @return DrupalSolrQueryInterface
  487. * The called object.
  488. */
  489. public function addFieldAliases($field_map) {
  490. $this->field_map = array_merge($this->field_map, $field_map);
  491. // We have to re-parse the filters.
  492. $this->parseSortString();
  493. return $this;
  494. }
  495. public function getFieldAliases() {
  496. return $this->field_map;
  497. }
  498. public function clearFieldAliases() {
  499. $this->field_map = array();
  500. // We have to re-parse the filters.
  501. $this->parseSortString();
  502. return $this;
  503. }
  504. protected function parseSortString() {
  505. // Substitute any field aliases with real field names.
  506. $sortstring = strtr($this->sortstring, $this->field_map);
  507. // Score is a special case - it's the default sort for Solr.
  508. if ('' == $sortstring || 'score desc' == $sortstring) {
  509. $this->solrsort['#name'] = 'score';
  510. $this->solrsort['#direction'] = 'desc';
  511. unset($this->params['sort']);
  512. }
  513. else {
  514. // Validate and set sort parameter
  515. $fields = array_keys($this->available_sorts);
  516. // Loop through available sorts and escape them, to allow for function sorts like geodist() in the preg_match() below
  517. foreach ($fields as $key => $field) {
  518. $fields[$key] = preg_quote($field);
  519. }
  520. // Implode the escaped available sorts together, then preg_match() against the sort string
  521. $fields = implode('|', $fields);
  522. if (preg_match('/^(?:(' . $fields . ') (asc|desc),?)+$/', $sortstring, $matches)) {
  523. // We only use the last match.
  524. $this->solrsort['#name'] = $matches[1];
  525. $this->solrsort['#direction'] = $matches[2];
  526. $this->params['sort'] = array($sortstring);
  527. }
  528. else {
  529. return FALSE;
  530. }
  531. }
  532. }
  533. public function getAvailableSorts() {
  534. return $this->available_sorts;
  535. }
  536. public function setAvailableSort($name, $sort) {
  537. // We expect non-aliased sorts to be added.
  538. $this->available_sorts[$name] = $sort;
  539. // Re-parse the sortstring.
  540. $this->parseSortString();
  541. return $this;
  542. }
  543. public function setAvailableSorts($sorts) {
  544. // We expect a complete array of valid sorts.
  545. $this->available_sorts = $sorts;
  546. $this->parseSortString();
  547. return $this;
  548. }
  549. public function removeAvailableSort($name) {
  550. unset($this->available_sorts[$name]);
  551. // Re-parse the sortstring.
  552. $this->parseSortString();
  553. return $this;
  554. }
  555. public function getSolrsort() {
  556. return $this->solrsort;
  557. }
  558. public function setSolrsort($name, $direction) {
  559. $this->sortstring = trim($name) . ' ' . trim($direction);
  560. $this->parseSortString();
  561. return $this;
  562. }
  563. public function getPath($new_keywords = NULL) {
  564. if (isset($new_keywords)) {
  565. return $this->base_path . '/' . $new_keywords;
  566. }
  567. elseif ($this->getParam('q')) {
  568. return $this->base_path . '/' . $this->getParam('q');
  569. }
  570. else {
  571. // Return with empty query (the slash). The path for a facet
  572. // becomes $this->base_path . '//facetinfo';
  573. // We do this so we can have a consistent way of retrieving the query +
  574. // additional parameters
  575. return $this->base_path . '/';
  576. }
  577. }
  578. public function getSolrsortUrlQuery() {
  579. $queryvalues = array();
  580. $solrsort = $this->solrsort;
  581. if ($solrsort && ($solrsort['#name'] != 'score')) {
  582. if (isset($this->field_map[$solrsort['#name']])) {
  583. $solrsort['#name'] = $this->field_map[$solrsort['#name']];
  584. }
  585. $queryvalues['solrsort'] = $solrsort['#name'] . ' ' . $solrsort['#direction'];
  586. }
  587. else {
  588. // Return to default relevancy sort.
  589. unset($queryvalues['solrsort']);
  590. }
  591. return $queryvalues;
  592. }
  593. public function search($keys = NULL) {
  594. if ($this->abort_search) {
  595. return NULL;
  596. }
  597. return $this->solr->search($keys, $this->getSolrParams());
  598. }
  599. public function solr($method) {
  600. return $this->solr->$method();
  601. }
  602. }