Drupal_Apache_Solr_Service.php 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. <?php
  2. /**
  3. * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * - Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * - Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
  15. * its contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
  31. * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
  32. * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
  33. *
  34. * @package Apache
  35. * @subpackage Solr
  36. * @author Donovan Jimenez <djimenez@conduit-it.com>
  37. */
  38. /**
  39. * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
  40. * Jacob Singh, Alejandro Garza, Peter Wolanin, Nick Veenhof and additional
  41. * contributors.
  42. *
  43. * This program is free software; you can redistribute it and/or modify
  44. * it under the terms of the GNU General Public License as published by
  45. * the Free Software Foundation; either version 2 of the License, or (at
  46. * your option) any later version.
  47. *
  48. * This program is distributed in the hope that it will be useful, but
  49. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  50. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  51. * for more details.
  52. *
  53. * You should have received a copy of the GNU General Public License
  54. * along with this program as the file LICENSE.txt; if not, please see
  55. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
  56. */
  57. /**
  58. * Starting point for the Solr API. Represents a Solr server resource and has
  59. * methods for pinging, adding, deleting, committing, optimizing and searching.
  60. */
  61. class DrupalApacheSolrService implements DrupalApacheSolrServiceInterface {
  62. /**
  63. * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values
  64. * are 'map' (default) or 'flat'.
  65. *
  66. */
  67. const NAMED_LIST_FORMAT = 'map';
  68. /**
  69. * Servlet mappings
  70. */
  71. const PING_SERVLET = 'admin/ping';
  72. const UPDATE_SERVLET = 'update';
  73. const SEARCH_SERVLET = 'select';
  74. const LUKE_SERVLET = 'admin/luke';
  75. const SYSTEM_SERVLET = 'admin/system';
  76. const STATS_SERVLET = 'admin/stats.jsp';
  77. const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
  78. /**
  79. * Server url
  80. *
  81. * @var array
  82. */
  83. protected $parsed_url;
  84. /**
  85. * Constructed servlet full path URLs
  86. *
  87. * @var string
  88. */
  89. protected $update_url;
  90. /**
  91. * Default HTTP timeout when one is not specified (initialized to default_socket_timeout ini setting)
  92. *
  93. * var float
  94. */
  95. protected $_defaultTimeout;
  96. protected $env_id;
  97. protected $luke;
  98. protected $stats;
  99. protected $system_info;
  100. /**
  101. * Flag that denotes whether to use soft commits for Solr 4.x, defaults to FALSE.
  102. *
  103. * @var bool
  104. */
  105. protected $soft_commit = FALSE;
  106. /**
  107. * Call the /admin/ping servlet, to test the connection to the server.
  108. *
  109. * @param $timeout
  110. * maximum time to wait for ping in seconds, -1 for unlimited (default 2).
  111. * @return
  112. * (float) seconds taken to ping the server, FALSE if timeout occurs.
  113. */
  114. public function ping($timeout = 2) {
  115. $start = microtime(TRUE);
  116. if ($timeout <= 0.0) {
  117. $timeout = -1;
  118. }
  119. $pingUrl = $this->_constructUrl(self::PING_SERVLET);
  120. // Attempt a HEAD request to the solr ping url.
  121. $options = array(
  122. 'method' => 'HEAD',
  123. 'timeout' => $timeout,
  124. );
  125. $response = $this->_makeHttpRequest($pingUrl, $options);
  126. if ($response->code == 200) {
  127. // Add 0.1 ms to the ping time so we never return 0.0.
  128. return microtime(TRUE) - $start + 0.0001;
  129. }
  130. else {
  131. return FALSE;
  132. }
  133. }
  134. /**
  135. * Flags whether to use soft commits for Solr 4.x.
  136. *
  137. * @param bool $soft_commit
  138. * Whether or not to use soft commits for Solr 4.x.
  139. */
  140. public function setSoftCommit($soft_commit) {
  141. $this->soft_commit = (bool) $soft_commit;
  142. }
  143. /**
  144. * Returns the flag that denotes whether to use soft commits for Solr 4.x.
  145. *
  146. * @return bool
  147. * Whether to use soft commits for Solr 4.x.
  148. */
  149. public function getSoftCommit() {
  150. return $this->soft_commit;
  151. }
  152. /**
  153. * Call the /admin/system servlet
  154. *
  155. * @return
  156. * (array) With all the system info
  157. */
  158. protected function setSystemInfo() {
  159. $url = $this->_constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
  160. if ($this->env_id) {
  161. $this->system_info_cid = $this->env_id . ":system:" . drupal_hash_base64($url);
  162. $cache = cache_get($this->system_info_cid, 'cache_apachesolr');
  163. if (isset($cache->data)) {
  164. $this->system_info = json_decode($cache->data);
  165. }
  166. }
  167. // Second pass to populate the cache if necessary.
  168. if (empty($this->system_info)) {
  169. $response = $this->_sendRawGet($url);
  170. $this->system_info = json_decode($response->data);
  171. if ($this->env_id) {
  172. cache_set($this->system_info_cid, $response->data, 'cache_apachesolr');
  173. }
  174. }
  175. }
  176. /**
  177. * Get information about the Solr Core.
  178. *
  179. * @return
  180. * (string) system info encoded in json
  181. */
  182. public function getSystemInfo() {
  183. if (!isset($this->system_info)) {
  184. $this->setSystemInfo();
  185. }
  186. return $this->system_info;
  187. }
  188. /**
  189. * Sets $this->luke with the meta-data about the index from admin/luke.
  190. */
  191. protected function setLuke($num_terms = 0) {
  192. if (empty($this->luke[$num_terms])) {
  193. $params = array(
  194. 'numTerms' => "$num_terms",
  195. 'wt' => 'json',
  196. 'json.nl' => self::NAMED_LIST_FORMAT,
  197. );
  198. $url = $this->_constructUrl(self::LUKE_SERVLET, $params);
  199. if ($this->env_id) {
  200. $cid = $this->env_id . ":luke:" . drupal_hash_base64($url);
  201. $cache = cache_get($cid, 'cache_apachesolr');
  202. if (isset($cache->data)) {
  203. $this->luke = $cache->data;
  204. }
  205. }
  206. }
  207. // Second pass to populate the cache if necessary.
  208. if (empty($this->luke[$num_terms])) {
  209. $this->luke[$num_terms] = $this->_sendRawGet($url);
  210. if ($this->env_id) {
  211. cache_set($cid, $this->luke, 'cache_apachesolr');
  212. }
  213. }
  214. }
  215. /**
  216. * Get just the field meta-data about the index.
  217. */
  218. public function getFields($num_terms = 0) {
  219. return $this->getLuke($num_terms)->fields;
  220. }
  221. /**
  222. * Get meta-data about the index.
  223. */
  224. public function getLuke($num_terms = 0) {
  225. if (!isset($this->luke[$num_terms])) {
  226. $this->setLuke($num_terms);
  227. }
  228. return $this->luke[$num_terms];
  229. }
  230. /**
  231. * Get the current solr version. This could be 1, 3 or 4
  232. *
  233. * @return int
  234. * 1, 3 or 4. Does not give a more details version, for that you need
  235. * to get the system info.
  236. */
  237. public function getSolrVersion() {
  238. $system_info = $this->getSystemInfo();
  239. // Get our solr version number
  240. if (isset($system_info->lucene->{'solr-spec-version'})) {
  241. return $system_info->lucene->{'solr-spec-version'}[0];
  242. }
  243. return 0;
  244. }
  245. /**
  246. * Sets $this->stats with the information about the Solr Core form
  247. */
  248. protected function setStats() {
  249. $data = $this->getLuke();
  250. $solr_version = $this->getSolrVersion();
  251. // Only try to get stats if we have connected to the index.
  252. if (empty($this->stats) && isset($data->index->numDocs)) {
  253. if ($solr_version >= 4) {
  254. $url = $this->_constructUrl(self::STATS_SERVLET_4);
  255. }
  256. else {
  257. $url = $this->_constructUrl(self::STATS_SERVLET);
  258. }
  259. if ($this->env_id) {
  260. $this->stats_cid = $this->env_id . ":stats:" . drupal_hash_base64($url);
  261. $cache = cache_get($this->stats_cid, 'cache_apachesolr');
  262. if (isset($cache->data)) {
  263. $this->stats = simplexml_load_string($cache->data);
  264. }
  265. }
  266. // Second pass to populate the cache if necessary.
  267. if (empty($this->stats)) {
  268. $response = $this->_sendRawGet($url);
  269. $this->stats = simplexml_load_string($response->data);
  270. if ($this->env_id) {
  271. cache_set($this->stats_cid, $response->data, 'cache_apachesolr');
  272. }
  273. }
  274. }
  275. }
  276. /**
  277. * Get information about the Solr Core.
  278. *
  279. * Returns a Simple XMl document
  280. */
  281. public function getStats() {
  282. if (!isset($this->stats)) {
  283. $this->setStats();
  284. }
  285. return $this->stats;
  286. }
  287. /**
  288. * Get summary information about the Solr Core.
  289. */
  290. public function getStatsSummary() {
  291. $stats = $this->getStats();
  292. $solr_version = $this->getSolrVersion();
  293. $summary = array(
  294. '@pending_docs' => '',
  295. '@autocommit_time_seconds' => '',
  296. '@autocommit_time' => '',
  297. '@deletes_by_id' => '',
  298. '@deletes_by_query' => '',
  299. '@deletes_total' => '',
  300. '@schema_version' => '',
  301. '@core_name' => '',
  302. '@index_size' => '',
  303. );
  304. if (!empty($stats)) {
  305. if ($solr_version <= 3) {
  306. $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
  307. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  308. $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
  309. $max_time = (int) trim(current($max_time_xpath));
  310. // Convert to seconds.
  311. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  312. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  313. $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
  314. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  315. $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
  316. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  317. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  318. $schema = $stats->xpath('/solr/schema[1]');
  319. $summary['@schema_version'] = trim($schema[0]);
  320. $core = $stats->xpath('/solr/core[1]');
  321. $summary['@core_name'] = trim($core[0]);
  322. $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
  323. $summary['@index_size'] = trim(current($size_xpath));
  324. }
  325. else {
  326. $system_info = $this->getSystemInfo();
  327. $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
  328. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  329. $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
  330. $max_time = (int) trim(current($max_time_xpath));
  331. // Convert to seconds.
  332. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  333. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  334. $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
  335. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  336. $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
  337. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  338. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  339. $schema = $system_info->core->schema;
  340. $summary['@schema_version'] = $schema;
  341. $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
  342. $summary['@core_name'] = trim(current($core));
  343. $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
  344. $summary['@index_size'] = trim(current($size_xpath));
  345. }
  346. }
  347. return $summary;
  348. }
  349. /**
  350. * Clear cached Solr data.
  351. */
  352. public function clearCache() {
  353. // Don't clear cached data if the server is unavailable.
  354. if (@$this->ping()) {
  355. $this->_clearCache();
  356. }
  357. else {
  358. throw new Exception('No Solr instance available when trying to clear the cache.');
  359. }
  360. }
  361. protected function _clearCache() {
  362. if ($this->env_id) {
  363. cache_clear_all($this->env_id . ":stats:", 'cache_apachesolr', TRUE);
  364. cache_clear_all($this->env_id . ":luke:", 'cache_apachesolr', TRUE);
  365. }
  366. $this->luke = array();
  367. $this->stats = NULL;
  368. }
  369. /**
  370. * Constructor
  371. *
  372. * @param $url
  373. * The URL to the Solr server, possibly including a core name. E.g. http://localhost:8983/solr/
  374. * or https://search.example.com/solr/core99/
  375. * @param $env_id
  376. * The machine name of a corresponding saved configuration used for loading
  377. * data like which facets are enabled.
  378. */
  379. public function __construct($url, $env_id = NULL) {
  380. $this->env_id = $env_id;
  381. $this->setUrl($url);
  382. // determine our default http timeout from ini settings
  383. $this->_defaultTimeout = (int) ini_get('default_socket_timeout');
  384. // double check we didn't get 0 for a timeout
  385. if ($this->_defaultTimeout <= 0) {
  386. $this->_defaultTimeout = 60;
  387. }
  388. }
  389. function getId() {
  390. return $this->env_id;
  391. }
  392. /**
  393. * Check the reponse code and thow an exception if it's not 200.
  394. *
  395. * @param stdClass $response
  396. * response object.
  397. *
  398. * @return
  399. * response object
  400. * @thows Exception
  401. */
  402. protected function checkResponse($response) {
  403. $code = (int) $response->code;
  404. if ($code != 200) {
  405. // Report where the user's code called the apachesolr code
  406. $caller = $this->findCaller();
  407. watchdog(
  408. 'Apache Solr',
  409. t('HTTP Status: %http_status; <br>Message: %status_message; <br>Response: %response; <br>Request: %request; <br>Caller: %function (line %line of %file)'),
  410. array(
  411. '%http_status' => $code,
  412. '%status_message' => $response->status_message,
  413. '%response' => $response->data,
  414. '%request' => empty($response->request) ? t('Unknown') : $response->request,
  415. '%function' => isset($caller['class']) ? $caller['class'].'->'.$caller['function'].'()' : $caller['function'].'()',
  416. '%line' => $caller['line'],
  417. '%file' => $caller['file'],
  418. ),
  419. WATCHDOG_ERROR
  420. );
  421. throw new Exception('HTTP ' . $code . '; ' . $response->status_message);
  422. }
  423. return $response;
  424. }
  425. /**
  426. * Determine the routine that called this query.
  427. *
  428. * We define "the routine that called this query" as the first entry in
  429. * the call stack that is not inside /apachesolr/. That makes the climbing
  430. * logic very simple, and handles variable stack depth and hook functions.
  431. *
  432. * Copied from includes/database/log.inc
  433. *
  434. * @link http://www.php.net/debug_backtrace
  435. * @return
  436. * This method returns a stack trace entry similar to that generated by
  437. * debug_backtrace(). However, it flattens the trace entry and the trace
  438. * entry before it so that we get the function and args of the function that
  439. * called into the apachesolr module, not the function and args of the
  440. * Solr call itself.
  441. */
  442. public function findCaller() {
  443. $stack = debug_backtrace();
  444. $stack_count = count($stack);
  445. for ($i = 0; $i < $stack_count; ++$i) {
  446. if (!isset($stack[$i]['file']) || strpos($stack[$i]['file'], DIRECTORY_SEPARATOR . 'apachesolr' . DIRECTORY_SEPARATOR) === FALSE) {
  447. return array(
  448. 'file' => isset($stack[$i]['file']) ? $stack[$i]['file'] : t('Unknown'),
  449. 'line' => isset($stack[$i]['line']) ? $stack[$i]['line'] : t('Unknown'),
  450. 'function' => $stack[$i + 1]['function'],
  451. 'class' => isset($stack[$i + 1]['class']) ? $stack[$i + 1]['class'] : NULL,
  452. 'type' => isset($stack[$i + 1]['type']) ? $stack[$i + 1]['type'] : NULL,
  453. 'args' => $stack[$i + 1]['args'],
  454. );
  455. }
  456. }
  457. }
  458. /**
  459. * Make a request to a servlet (a path) that's not a standard path.
  460. *
  461. * @param string $servlet
  462. * A path to be added to the base Solr path. e.g. 'extract/tika'
  463. *
  464. * @param array $params
  465. * Any request parameters when constructing the URL.
  466. *
  467. * @param array $options
  468. * @see drupal_http_request() $options.
  469. *
  470. * @return
  471. * response object
  472. *
  473. * @thows Exception
  474. */
  475. public function makeServletRequest($servlet, $params = array(), $options = array()) {
  476. // Add default params.
  477. $params += array(
  478. 'wt' => 'json',
  479. 'json.nl' => self::NAMED_LIST_FORMAT,
  480. );
  481. $url = $this->_constructUrl($servlet, $params);
  482. $response = $this->_makeHttpRequest($url, $options);
  483. return $this->checkResponse($response);
  484. }
  485. /**
  486. * Central method for making a GET operation against this Solr Server
  487. */
  488. protected function _sendRawGet($url, $options = array()) {
  489. $response = $this->_makeHttpRequest($url, $options);
  490. return $this->checkResponse($response);
  491. }
  492. /**
  493. * Central method for making a POST operation against this Solr Server
  494. */
  495. protected function _sendRawPost($url, $options = array()) {
  496. $options['method'] = 'POST';
  497. // Normally we use POST to send XML documents.
  498. if (!isset($options['headers']['Content-Type'])) {
  499. $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
  500. }
  501. $response = $this->_makeHttpRequest($url, $options);
  502. return $this->checkResponse($response);
  503. }
  504. /**
  505. * Central method for making the actual http request to the Solr Server
  506. *
  507. * This is just a wrapper around drupal_http_request().
  508. */
  509. protected function _makeHttpRequest($url, array $options = array()) {
  510. if (!isset($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
  511. // Make sure we are not sending a request body.
  512. $options['data'] = NULL;
  513. }
  514. $result = drupal_http_request($url, $options);
  515. if (!isset($result->code) || $result->code < 0) {
  516. $result->code = 0;
  517. $result->status_message = 'Request failed';
  518. $result->protocol = 'HTTP/1.0';
  519. }
  520. // Additional information may be in the error property.
  521. if (isset($result->error)) {
  522. $result->status_message .= ': ' . check_plain($result->error);
  523. }
  524. if (!isset($result->data)) {
  525. $result->data = '';
  526. $result->response = NULL;
  527. }
  528. else {
  529. $response = json_decode($result->data);
  530. if (is_object($response)) {
  531. foreach ($response as $key => $value) {
  532. $result->$key = $value;
  533. }
  534. }
  535. }
  536. return $result;
  537. }
  538. /**
  539. * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
  540. *
  541. * NOTE: inside a phrase fewer characters need escaped, use {@link DrupalApacheSolrService::escapePhrase()} instead
  542. *
  543. * @param string $value
  544. * @return string
  545. */
  546. static public function escape($value)
  547. {
  548. //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
  549. $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
  550. $replace = '\\\$1';
  551. return preg_replace($pattern, $replace, $value);
  552. }
  553. /**
  554. * Escape a value meant to be contained in a phrase for special query characters
  555. *
  556. * @param string $value
  557. * @return string
  558. */
  559. static public function escapePhrase($value)
  560. {
  561. $pattern = '/("|\\\)/';
  562. $replace = '\\\$1';
  563. return preg_replace($pattern, $replace, $value);
  564. }
  565. /**
  566. * Convenience function for creating phrase syntax from a value
  567. *
  568. * @param string $value
  569. * @return string
  570. */
  571. static public function phrase($value)
  572. {
  573. return '"' . self::escapePhrase($value) . '"';
  574. }
  575. /**
  576. * Return a valid http URL given this server's host, port and path and a provided servlet name
  577. *
  578. * @param $servlet
  579. * A string path to a Solr request handler.
  580. * @param $params
  581. * @param $parsed_url
  582. * A url to use instead of the stored one.
  583. *
  584. * @return string
  585. */
  586. protected function _constructUrl($servlet, $params = array(), $added_query_string = NULL) {
  587. // PHP's built in http_build_query() doesn't give us the format Solr wants.
  588. $query_string = $this->httpBuildQuery($params);
  589. if ($query_string) {
  590. $query_string = '?' . $query_string;
  591. if ($added_query_string) {
  592. $query_string = $query_string . '&' . $added_query_string;
  593. }
  594. }
  595. elseif ($added_query_string) {
  596. $query_string = '?' . $added_query_string;
  597. }
  598. $url = $this->parsed_url;
  599. return $url['scheme'] . $url['user'] . $url['pass'] . $url['host'] . $url['port'] . $url['path'] . $servlet . $query_string;
  600. }
  601. /**
  602. * Get the Solr url
  603. *
  604. * @return string
  605. */
  606. public function getUrl() {
  607. return $this->_constructUrl('');
  608. }
  609. /**
  610. * Set the Solr url.
  611. *
  612. * @param $url
  613. *
  614. * @return $this
  615. */
  616. public function setUrl($url) {
  617. $parsed_url = parse_url($url);
  618. if (!isset($parsed_url['scheme'])) {
  619. $parsed_url['scheme'] = 'http';
  620. }
  621. $parsed_url['scheme'] .= '://';
  622. if (!isset($parsed_url['user'])) {
  623. $parsed_url['user'] = '';
  624. }
  625. else {
  626. $parsed_url['host'] = '@' . $parsed_url['host'];
  627. }
  628. $parsed_url['pass'] = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
  629. $parsed_url['port'] = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
  630. if (isset($parsed_url['path'])) {
  631. // Make sure the path has a single leading/trailing slash.
  632. $parsed_url['path'] = '/' . ltrim($parsed_url['path'], '/');
  633. $parsed_url['path'] = rtrim($parsed_url['path'], '/') . '/';
  634. }
  635. else {
  636. $parsed_url['path'] = '/';
  637. }
  638. // For now we ignore query and fragment.
  639. $this->parsed_url = $parsed_url;
  640. // Force the update url to be rebuilt.
  641. unset($this->update_url);
  642. return $this;
  643. }
  644. /**
  645. * Raw update Method. Takes a raw post body and sends it to the update service. Post body
  646. * should be a complete and well formed xml document.
  647. *
  648. * @param string $rawPost
  649. * @param float $timeout Maximum expected duration (in seconds)
  650. *
  651. * @return response object
  652. *
  653. * @throws Exception If an error occurs during the service call
  654. */
  655. public function update($rawPost, $timeout = FALSE) {
  656. // @todo: throw exception if updates are disabled.
  657. if (empty($this->update_url)) {
  658. // Store the URL in an instance variable since many updates may be sent
  659. // via a single instance of this class.
  660. $this->update_url = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
  661. }
  662. $options['data'] = $rawPost;
  663. if ($timeout) {
  664. $options['timeout'] = $timeout;
  665. }
  666. return $this->_sendRawPost($this->update_url, $options);
  667. }
  668. /**
  669. * Add an array of Solr Documents to the index all at once
  670. *
  671. * @param array $documents Should be an array of ApacheSolrDocument instances
  672. * @param boolean $allowDups
  673. * @param boolean $overwritePending
  674. * @param boolean $overwriteCommitted
  675. *
  676. * @return response objecte
  677. *
  678. * @throws Exception If an error occurs during the service call
  679. */
  680. public function addDocuments($documents, $overwrite = NULL, $commitWithin = NULL) {
  681. $attr = '';
  682. if (isset($overwrite)) {
  683. $attr .= ' overwrite="' . empty($overwrite) ? 'false"' : 'true"';
  684. }
  685. if (isset($commitWithin)) {
  686. $attr .= ' commitWithin="' . intval($commitWithin) . '"';
  687. }
  688. $rawPost = "<add{$attr}>";
  689. foreach ($documents as $document) {
  690. if (is_object($document) && ($document instanceof ApacheSolrDocument)) {
  691. $rawPost .= ApacheSolrDocument::documentToXml($document);
  692. }
  693. }
  694. $rawPost .= '</add>';
  695. return $this->update($rawPost);
  696. }
  697. /**
  698. * Send a commit command. Will be synchronous unless both wait parameters are set to false.
  699. *
  700. * @param boolean $optimize Defaults to true
  701. * optimizes the index files. Only valid for solr versions <= 3
  702. * @param boolean $waitFlush
  703. * block until index changes are flushed to disk. Only valid for solr versions <= 3
  704. * @param boolean $waitSearcher
  705. * block until a new searcher is opened and registered as the main query searcher, making the changes visible.
  706. * @param float $timeout
  707. * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
  708. *
  709. * @return response object
  710. *
  711. * @throws Exception If an error occurs during the service call
  712. */
  713. public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) {
  714. $optimizeValue = $optimize ? 'true' : 'false';
  715. $flushValue = $waitFlush ? 'true' : 'false';
  716. $searcherValue = $waitSearcher ? 'true' : 'false';
  717. $softCommit = $this->soft_commit ? 'true' : 'false';
  718. $solr_version = $this->getSolrVersion();
  719. if ($solr_version <= 3) {
  720. $rawPost = '<commit waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" optimize="' . $optimizeValue . '" />';
  721. }
  722. else {
  723. $rawPost = '<commit waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />';
  724. }
  725. $response = $this->update($rawPost, $timeout);
  726. $this->_clearCache();
  727. return $response;
  728. }
  729. /**
  730. * Create a delete document based on document ID
  731. *
  732. * @param string $id Expected to be utf-8 encoded
  733. * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
  734. *
  735. * @return response object
  736. *
  737. * @throws Exception If an error occurs during the service call
  738. */
  739. public function deleteById($id, $timeout = 3600) {
  740. return $this->deleteByMultipleIds(array($id), $timeout);
  741. }
  742. /**
  743. * Create and post a delete document based on multiple document IDs.
  744. *
  745. * @param array $ids Expected to be utf-8 encoded strings
  746. * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
  747. *
  748. * @return response object
  749. *
  750. * @throws Exception If an error occurs during the service call
  751. */
  752. public function deleteByMultipleIds($ids, $timeout = 3600) {
  753. $rawPost = '<delete>';
  754. foreach ($ids as $id) {
  755. $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
  756. }
  757. $rawPost .= '</delete>';
  758. return $this->update($rawPost, $timeout);
  759. }
  760. /**
  761. * Create a delete document based on a query and submit it
  762. *
  763. * @param string $rawQuery Expected to be utf-8 encoded
  764. * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
  765. * @return stdClass response object
  766. *
  767. * @throws Exception If an error occurs during the service call
  768. */
  769. public function deleteByQuery($rawQuery, $timeout = 3600) {
  770. $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
  771. return $this->update($rawPost, $timeout);
  772. }
  773. /**
  774. * Send an optimize command. Will be synchronous unless both wait parameters are set
  775. * to false.
  776. *
  777. * @param boolean $waitFlush
  778. * block until index changes are flushed to disk Removed in Solr 4.0
  779. * @param boolean $waitSearcher
  780. * block until a new searcher is opened and registered as the main query searcher, making the changes visible.
  781. * @param float $timeout
  782. * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
  783. *
  784. * @return response object
  785. *
  786. * @throws Exception If an error occurs during the service call
  787. */
  788. public function optimize($waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) {
  789. $flushValue = $waitFlush ? 'true' : 'false';
  790. $searcherValue = $waitSearcher ? 'true' : 'false';
  791. $softCommit = $this->soft_commit ? 'true' : 'false';
  792. $solr_version = $this->getSolrVersion();
  793. if ($solr_version <= 3) {
  794. $rawPost = '<optimize waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" />';
  795. }
  796. else {
  797. $rawPost = '<optimize waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />';
  798. }
  799. return $this->update($rawPost, $timeout);
  800. }
  801. /**
  802. * Like PHP's built in http_build_query(), but uses rawurlencode() and no [] for repeated params.
  803. */
  804. protected function httpBuildQuery(array $query, $parent = '') {
  805. $params = array();
  806. foreach ($query as $key => $value) {
  807. $key = ($parent ? $parent : rawurlencode($key));
  808. // Recurse into children.
  809. if (is_array($value)) {
  810. $params[] = $this->httpBuildQuery($value, $key);
  811. }
  812. // If a query parameter value is NULL, only append its key.
  813. elseif (!isset($value)) {
  814. $params[] = $key;
  815. }
  816. else {
  817. $params[] = $key . '=' . rawurlencode($value);
  818. }
  819. }
  820. return implode('&', $params);
  821. }
  822. /**
  823. * Simple Search interface
  824. *
  825. * @param string $query The raw query string
  826. * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
  827. *
  828. * @return response object
  829. *
  830. * @throws Exception If an error occurs during the service call
  831. */
  832. public function search($query = '', array $params = array(), $method = 'GET') {
  833. // Always use JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning
  834. $params['wt'] = 'json';
  835. // Additional default params.
  836. $params += array(
  837. 'json.nl' => self::NAMED_LIST_FORMAT,
  838. );
  839. if ($query) {
  840. $params['q'] = $query;
  841. }
  842. // PHP's built in http_build_query() doesn't give us the format Solr wants.
  843. $queryString = $this->httpBuildQuery($params);
  844. // Check string length of the query string, change method to POST
  845. $len = strlen($queryString);
  846. // Fetch our threshold to find out when to flip to POST
  847. $max_len = apachesolr_environment_variable_get($this->env_id, 'apachesolr_search_post_threshold', 3600);
  848. // if longer than $max_len (default 3600) characters
  849. // we should switch to POST (a typical server handles 4096 max).
  850. // If this class is used independently (without environments), we switch automatically to POST at an
  851. // limit of 1800 chars.
  852. if (($len > 1800) && (empty($this->env_id) || ($len > $max_len))) {
  853. $method = 'POST';
  854. }
  855. if ($method == 'GET') {
  856. $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET, array(), $queryString);
  857. return $this->_sendRawGet($searchUrl);
  858. }
  859. else if ($method == 'POST') {
  860. $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
  861. $options['data'] = $queryString;
  862. $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
  863. return $this->_sendRawPost($searchUrl, $options);
  864. }
  865. else {
  866. throw new Exception("Unsupported method '$method' for search(), use GET or POST");
  867. }
  868. }
  869. }