apachesolr.index.inc 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521
  1. <?php
  2. /**
  3. * @file
  4. * Functions related to Apache Solr indexing operations.
  5. */
  6. /**
  7. * Processes all index queues associated with the passed environment.
  8. *
  9. * An environment usually indexes one or more entity types. Each entity type
  10. * stores its queue in a database table that is defined in the entity type's
  11. * info array. This function processes N number of items in each queue table,
  12. * where N is the limit passed as the second argument.
  13. *
  14. * The indexing routine allows developers to selectively bypass indexing on a
  15. * per-entity basis by implementing the following hooks:
  16. * - hook_apachesolr_exclude()
  17. * - hook_apachesolr_ENTITY_TYPE_exclude()
  18. *
  19. * @param string $env_id
  20. * The machine name of the environment.
  21. * @param int $limit
  22. * The number of items to process per queue table. For example, if there are
  23. * two entities that are being indexed in this environment and they each have
  24. * their own queue table, setting a limit of 50 will send a maximum number of
  25. * 100 documents to the Apache Solr server.
  26. *
  27. * @return int
  28. * The total number of documents sent to the Apache Solr server for indexing.
  29. *
  30. * @see apachesolr_index_get_entities_to_index()
  31. * @see apachesolr_index_entity_to_documents()
  32. * @see apachesolr_index_send_to_solr()
  33. */
  34. function apachesolr_index_entities($env_id, $limit) {
  35. $documents_submitted = 0;
  36. try {
  37. // Get the $solr object
  38. $solr = apachesolr_get_solr($env_id);
  39. // If there is no server available, don't continue.
  40. if (!$solr->ping(variable_get('apachesolr_ping_timeout', 4))) {
  41. throw new Exception(t('No Solr instance available during indexing.'));
  42. }
  43. }
  44. catch (Exception $e) {
  45. watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR);
  46. return FALSE;
  47. }
  48. foreach (entity_get_info() as $entity_type => $info) {
  49. // With each pass through the callback, retrieve the next group of nids.
  50. $rows = apachesolr_index_get_entities_to_index($env_id, $entity_type, $limit);
  51. // If there are none for this entity type - ignore it and go to the next
  52. // entity type.
  53. if (!count($rows)) {
  54. continue;
  55. }
  56. $documents = array();
  57. foreach ($rows as $row) {
  58. $row_documents = apachesolr_index_entities_document($row, $entity_type, $env_id);
  59. $documents = array_merge($documents, $row_documents);
  60. }
  61. $indexed = apachesolr_index_send_to_solr($env_id, $documents);
  62. if ($indexed !== FALSE) {
  63. $documents_submitted += count($documents);
  64. // Check who's the last in line
  65. $last_row = end($rows);
  66. // set our last position to the entity id and changed value so we can
  67. // keep track where we left off
  68. if (!empty($last_row->changed) && !empty($last_row->entity_id)) {
  69. apachesolr_set_last_index_position($env_id, $entity_type, $last_row->changed, $last_row->entity_id);
  70. }
  71. else {
  72. $message = 'Failure recording indexing progress. Last entity id processed: %entity_id with timestamp %last_changed';
  73. $variables = array(
  74. '%entity_id' => $last_row->entity_id,
  75. '%last_changed' => $last_row->changed,
  76. );
  77. // Add it to watchdog
  78. watchdog('Apache Solr', $message, $variables, WATCHDOG_ERROR);
  79. }
  80. apachesolr_set_last_index_updated($env_id, REQUEST_TIME);
  81. }
  82. }
  83. return $documents_submitted;
  84. }
  85. /**
  86. * Convert a certain entity from the apachesolr index table to a set of documents. 1 entity
  87. * can be converted in multiple documents if the apachesolr_index_entity_to_documents decides to do so.
  88. *
  89. * @param array $row
  90. * A row from the indexing table
  91. * @param string $entity_type
  92. * The type of the entity
  93. * @param string $env_id
  94. * The machine name of the environment.
  95. *
  96. * @return array of ApacheSolrDocument(s)
  97. */
  98. function apachesolr_index_entities_document($row, $entity_type, $env_id) {
  99. $documents = array();
  100. if (!empty($row->status)) {
  101. // Let any module exclude this entity from the index.
  102. $build_document = TRUE;
  103. foreach (module_implements('apachesolr_exclude') as $module) {
  104. $exclude = module_invoke($module, 'apachesolr_exclude', $row->entity_id, $entity_type, $row, $env_id);
  105. // If the hook returns TRUE we should exclude the entity
  106. if (!empty($exclude)) {
  107. $build_document = FALSE;
  108. }
  109. }
  110. foreach (module_implements('apachesolr_' . $entity_type . '_exclude') as $module) {
  111. $exclude = module_invoke($module, 'apachesolr_' . $entity_type . '_exclude', $row->entity_id, $row, $env_id);
  112. // If the hook returns TRUE we should exclude the entity
  113. if (!empty($exclude)) {
  114. $build_document = FALSE;
  115. }
  116. }
  117. if ($build_document) {
  118. $documents = array_merge($documents, apachesolr_index_entity_to_documents($row, $env_id));
  119. }
  120. }
  121. else {
  122. // Delete the entity from our index if the status callback returned 0
  123. apachesolr_remove_entity($env_id, $row->entity_type, $row->entity_id);
  124. }
  125. // Clear entity cache for this specific entity
  126. entity_get_controller($row->entity_type)->resetCache(array($row->entity_id));
  127. return $documents;
  128. }
  129. /**
  130. * Returns the total number of documents that are able to be indexed and the
  131. * number of documents left to be indexed.
  132. *
  133. * This is a helper function for modules that implement hook_search_status().
  134. *
  135. * @param string $env_id
  136. * The machine name of the environment.
  137. *
  138. * @return array
  139. * An associative array with the key-value pairs:
  140. * - remaining: The number of items left to index.
  141. * - total: The total number of items to index.
  142. *
  143. * @see hook_search_status()
  144. */
  145. function apachesolr_index_status($env_id) {
  146. $remaining = 0;
  147. $total = 0;
  148. foreach (entity_get_info() as $entity_type => $info) {
  149. $bundles = apachesolr_get_index_bundles($env_id, $entity_type);
  150. if (empty($bundles)) {
  151. continue;
  152. }
  153. $table = apachesolr_get_indexer_table($entity_type);
  154. $query = db_select($table, 'aie')
  155. ->condition('aie.status', 1)
  156. ->condition('aie.bundle', $bundles)
  157. ->addTag('apachesolr_index_' . $entity_type);
  158. $total += $query->countQuery()->execute()->fetchField();
  159. $query = _apachesolr_index_get_next_set_query($env_id, $entity_type);
  160. $remaining += $query->countQuery()->execute()->fetchField();
  161. }
  162. return array('remaining' => $remaining, 'total' => $total);
  163. }
  164. /**
  165. * Worker callback for apachesolr_index_entities().
  166. *
  167. * Loads and proccesses the entity queued for indexing and converts into one or
  168. * more documents that are sent to the Apache Solr server for indexing.
  169. *
  170. * The entity is loaded as the user specified in the "apachesolr_index_user"
  171. * system variable in order to prevent sentive data from being indexed and
  172. * displayed to underprivileged users in search results. The index user defaults
  173. * to a user ID of "0", which is the anonymous user.
  174. *
  175. * After the entity is loaded, it will be handed over to
  176. * apachesolr_convert_entity_to_documents() to be converted to an array via
  177. * the callback specified in the entity type's info array. The array that the
  178. * entity is converted to is the model of the document sent to the Apache Solr
  179. * server for indexing. This function allows developers to modify the document
  180. * by implementing the following hooks:
  181. * - apachesolr_index_document_build()
  182. * - apachesolr_index_document_build_ENTITY_TYPE()
  183. * - apachesolr_index_documents_alter()
  184. *
  185. * @param stdClass $item
  186. * The data returned by the queue table containing:
  187. * - entity_id: An integer containing the unique identifier of the entity, for
  188. * example a node ID or comment ID.
  189. * - entity_type: The unique identifier for the entity, i.e. "node", "file".
  190. * - bundle: The machine-readable name of the bundle the passed entity is
  191. * associated with.
  192. * - status: The "published" status of the entity. The status will also be set
  193. * to "0" when entity is deleted but the Apache Solr server is unavailable.
  194. * - changed: A timestamp flagging when the entity was last modified.
  195. * @param string $env_id
  196. * The machine name of the environment.
  197. *
  198. * @return array
  199. * An associative array of documents that are sent to the Apache Solr server
  200. * for indexing.
  201. *
  202. * @see apachesolr_index_nodes() for the old-skool version.
  203. */
  204. function apachesolr_index_entity_to_documents($item, $env_id) {
  205. global $user;
  206. drupal_save_session(FALSE);
  207. $saved_user = $user;
  208. // build the content for the index as an anonymous user to avoid exposing restricted fields and such.
  209. // By setting a variable, indexing can take place as a different user
  210. $uid = variable_get('apachesolr_index_user', 0);
  211. if ($uid == 0) {
  212. $user = drupal_anonymous_user();
  213. }
  214. else {
  215. $user = user_load($uid);
  216. }
  217. // Pull out all of our pertinent data.
  218. $entity_type = $item->entity_type;
  219. // Entity cache will be reset at the end of the indexing algorithm, to use the cache properly whenever
  220. // the code does another entity_load
  221. $entity = entity_load($entity_type, array($item->entity_id));
  222. $entity = $entity ? reset($entity) : FALSE;
  223. if (empty($entity)) {
  224. // If the object failed to load, just stop.
  225. return FALSE;
  226. }
  227. $documents = apachesolr_convert_entity_to_documents($entity, $entity_type, $env_id);
  228. // Restore the user.
  229. $user = $saved_user;
  230. drupal_save_session(TRUE);
  231. return $documents;
  232. }
  233. /**
  234. * The given entity is converted to an array via the callback
  235. * specified in the entity type's info array. The array that the entity is
  236. * converted to is the model of the document sent to the Apache Solr server for
  237. * indexing. This function allows developers to modify the document by
  238. * implementing the following hooks:
  239. * - apachesolr_index_document_build()
  240. * - apachesolr_index_document_build_ENTITY_TYPE()
  241. * - apachesolr_index_documents_alter()
  242. *
  243. * This function's code has been isolated from
  244. * apachesolr_index_entity_to_documents() to a separate function to be re-used
  245. * by apachesolr_multilingual_apachesolr_index_documents_alter().
  246. *
  247. * @param object $entity
  248. * The entity for which we want a document.
  249. * @param string $entity_type
  250. * The type of entity we're processing.
  251. * @param string $env_id
  252. * The machine name of the environment.
  253. *
  254. * @return array
  255. * An associative array of documents that are sent to the Apache Solr server
  256. * for indexing.
  257. */
  258. function apachesolr_convert_entity_to_documents($entity, $entity_type, $env_id) {
  259. list($entity_id, $vid, $bundle) = entity_extract_ids($entity_type, $entity);
  260. // Create a new document, and do the bare minimum on it.
  261. $document = _apachesolr_index_process_entity_get_document($entity, $entity_type);
  262. //Get the callback array to add stuff to the document
  263. $document_callbacks = apachesolr_entity_get_callback($entity_type, 'document callback', $bundle);
  264. $documents = array();
  265. foreach ($document_callbacks as $document_callback) {
  266. // Call a type-specific callback to add stuff to the document.
  267. $documents = array_merge($documents, $document_callback($document, $entity, $entity_type, $env_id));
  268. }
  269. //do this for all possible documents that were returned by the callbacks
  270. foreach ($documents as $document) {
  271. // Call an all-entity hook to add stuff to the document.
  272. module_invoke_all('apachesolr_index_document_build', $document, $entity, $entity_type, $env_id);
  273. // Call a type-specific hook to add stuff to the document.
  274. module_invoke_all('apachesolr_index_document_build_' . $entity_type, $document, $entity, $env_id);
  275. // Final processing to ensure that the document is properly structured.
  276. // All records must have a label field, which is used for user-friendly labeling.
  277. if (empty($document->label)) {
  278. $document->label = '';
  279. }
  280. // All records must have a "content" field, which is used for fulltext indexing.
  281. // If we don't have one, enter an empty value. This does mean that the entity
  282. // will not be fulltext searchable.
  283. if (empty($document->content)) {
  284. $document->content = '';
  285. }
  286. // All records must have a "teaser" field, which is used for abbreviated
  287. // displays when no highlighted text is available.
  288. if (empty($document->teaser)) {
  289. $document->teaser = truncate_utf8($document->content, 300, TRUE);
  290. }
  291. }
  292. // Now allow modules to alter each other's additions for maximum flexibility.
  293. // Hook to allow modifications of the retrieved results
  294. foreach (module_implements('apachesolr_index_documents_alter') as $module) {
  295. $function = $module . '_apachesolr_index_documents_alter';
  296. $function($documents, $entity, $entity_type, $env_id);
  297. }
  298. return $documents;
  299. }
  300. /**
  301. * Index an array of documents to solr.
  302. *
  303. * @param $env_id
  304. * @param array $documents
  305. *
  306. * @return bool|int number indexed, or FALSE on failure.
  307. * @throws Exception
  308. */
  309. function apachesolr_index_send_to_solr($env_id, array $documents) {
  310. // Get the $solr object
  311. $solr = apachesolr_get_solr($env_id);
  312. // Do not index when we do not have any documents to send
  313. // Send TRUE because this is not an error
  314. if (empty($documents)) {
  315. return TRUE;
  316. }
  317. // Send the document off to Solr.
  318. $log_success = variable_get('apachesolr_watchdog_successes', TRUE);
  319. if ($log_success) {
  320. watchdog('Apache Solr', 'Adding @count documents.', array('@count' => count($documents)));
  321. }
  322. try {
  323. $docs_chunk = array_chunk($documents, 20);
  324. foreach ($docs_chunk as $docs) {
  325. $solr->addDocuments($docs);
  326. }
  327. if ($log_success) {
  328. watchdog('Apache Solr', 'Indexing succeeded on @count documents', array(
  329. '@count' => count($documents),
  330. ), WATCHDOG_INFO);
  331. }
  332. return count($documents);
  333. }
  334. catch (Exception $e) {
  335. if (!empty($docs)) {
  336. foreach ($docs as $doc) {
  337. $eids[] = $doc->entity_type . '/' . $doc->entity_id;
  338. }
  339. }
  340. watchdog('Apache Solr', 'Indexing failed on one of the following entity ids: @eids <br /> !message', array(
  341. '@eids' => implode(', ', $eids),
  342. '!message' => nl2br(strip_tags($e->getMessage())),
  343. ), WATCHDOG_ERROR);
  344. return FALSE;
  345. }
  346. }
  347. function _apachesolr_tags_to_index() {
  348. $tags_to_index = variable_get('apachesolr_tags_to_index', array(
  349. 'h1' => 'tags_h1',
  350. 'h2' => 'tags_h2_h3',
  351. 'h3' => 'tags_h2_h3',
  352. 'h4' => 'tags_h4_h5_h6',
  353. 'h5' => 'tags_h4_h5_h6',
  354. 'h6' => 'tags_h4_h5_h6',
  355. 'u' => 'tags_inline',
  356. 'b' => 'tags_inline',
  357. 'i' => 'tags_inline',
  358. 'strong' => 'tags_inline',
  359. 'em' => 'tags_inline',
  360. 'a' => 'tags_a'
  361. ));
  362. return $tags_to_index;
  363. }
  364. /**
  365. * Extract HTML tag contents from $text and add to boost fields.
  366. *
  367. * @param ApacheSolrDocument $document
  368. * @param string $text
  369. * must be stripped of control characters before hand.
  370. *
  371. */
  372. function apachesolr_index_add_tags_to_document(ApacheSolrDocument $document, $text) {
  373. $tags_to_index = _apachesolr_tags_to_index();
  374. // Strip off all ignored tags.
  375. $allowed_tags = '<' . implode('><', array_keys($tags_to_index)) . '>';
  376. $text = strip_tags($text, $allowed_tags);
  377. preg_match_all('@<(' . implode('|', array_keys($tags_to_index)) . ')[^>]*>(.*)</\1>@Ui', $text, $matches);
  378. foreach ($matches[1] as $key => $tag) {
  379. $tag = drupal_strtolower($tag);
  380. // We don't want to index links auto-generated by the url filter.
  381. if ($tag != 'a' || !preg_match('@(?:http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\.)[a-zA-Z0-9]+@', $matches[2][$key])) {
  382. if (!isset($document->{$tags_to_index[$tag]})) {
  383. $document->{$tags_to_index[$tag]} = '';
  384. }
  385. $document->{$tags_to_index[$tag]} .= ' ' . apachesolr_clean_text($matches[2][$key]);
  386. }
  387. }
  388. }
  389. /**
  390. * Returns a generic Solr document object for this entity.
  391. *
  392. * This function will do the basic processing for the document that is common
  393. * to all entities, but virtually all entities will need their own additional
  394. * processing.
  395. *
  396. * @param object $entity
  397. * The entity for which we want a document.
  398. * @param string $entity_type
  399. * The type of entity we're processing.
  400. * @return ApacheSolrDocument
  401. */
  402. function _apachesolr_index_process_entity_get_document($entity, $entity_type) {
  403. list($entity_id, $vid, $bundle) = entity_extract_ids($entity_type, $entity);
  404. $document = new ApacheSolrDocument();
  405. // Define our url options in advance. This differs depending on the
  406. // language
  407. $languages = language_list();
  408. $url_options = array('absolute' => TRUE);
  409. if (isset($entity->language) && isset($languages[$entity->language])) {
  410. $url_options['language'] = $languages[$entity->language];
  411. }
  412. $document->id = apachesolr_document_id($entity_id, $entity_type);
  413. $document->site = url(NULL, $url_options);
  414. $document->hash = apachesolr_site_hash();
  415. $document->entity_id = $entity_id;
  416. $document->entity_type = $entity_type;
  417. $document->bundle = $bundle;
  418. $document->bundle_name = entity_bundle_label($entity_type, $bundle);
  419. if (empty($entity->language)) {
  420. // 'und' is the language-neutral code in Drupal 7.
  421. $document->ss_language = LANGUAGE_NONE;
  422. }
  423. else {
  424. $document->ss_language = $entity->language;
  425. }
  426. $path = entity_uri($entity_type, $entity);
  427. // A path is not a requirement of an entity
  428. if (!empty($path)) {
  429. $document->path = $path['path'];
  430. $document->url = url($path['path'], $path['options'] + $url_options);
  431. // Path aliases can have important information about the content.
  432. // Add them to the index as well.
  433. if (function_exists('drupal_get_path_alias')) {
  434. // Add any path alias to the index, looking first for language specific
  435. // aliases but using language neutral aliases otherwise.
  436. $output = drupal_get_path_alias($document->path, $document->ss_language);
  437. if ($output && $output != $document->path) {
  438. $document->path_alias = $output;
  439. }
  440. }
  441. }
  442. return $document;
  443. }
  444. /**
  445. * Returns an array of rows from a query based on an indexing environment.
  446. * @todo Remove the read only because it is not environment specific
  447. *
  448. * @param $env_id
  449. * @param $entity_type
  450. * @param $limit
  451. *
  452. * @return array list of row to index
  453. */
  454. function apachesolr_index_get_entities_to_index($env_id, $entity_type, $limit) {
  455. $rows = array();
  456. if (variable_get('apachesolr_read_only', 0)) {
  457. return $rows;
  458. }
  459. $bundles = apachesolr_get_index_bundles($env_id, $entity_type);
  460. if (empty($bundles)) {
  461. return $rows;
  462. }
  463. // Get next batch of entities to index
  464. $query = _apachesolr_index_get_next_set_query($env_id, $entity_type);
  465. $query->range(0, $limit);
  466. $records = $query->execute();
  467. $status_callbacks = array();
  468. foreach ($records as $record) {
  469. if (!isset($status_callbacks[$record->bundle])) {
  470. $status_callbacks[$record->bundle] = apachesolr_entity_get_callback($entity_type, 'status callback', $record->bundle);
  471. }
  472. // Check status and status callbacks before sending to the index
  473. if (is_array($status_callbacks[$record->bundle])) {
  474. foreach ($status_callbacks[$record->bundle] as $status_callback) {
  475. if (is_callable($status_callback)) {
  476. // by placing $status in front we prevent calling any other callback
  477. // after one status callback returned false
  478. $record->status = $record->status && $status_callback($record->entity_id, $record->entity_type);
  479. }
  480. }
  481. }
  482. $rows[] = $record;
  483. }
  484. return $rows;
  485. }
  486. /**
  487. * Delete the whole index for an environment.
  488. *
  489. * @param string $env_id
  490. * The machine name of the environment.
  491. * @param string $entity_type
  492. * (optional) specify to remove just this entity_type from the index.
  493. * @param string $bundle
  494. * (optional) also specify a bundle to remove just the bundle from
  495. * the index.
  496. *
  497. * @return
  498. * TRUE for success, FALSE if an error occured.
  499. */
  500. function apachesolr_index_delete_index($env_id, $entity_type = NULL, $bundle = NULL) {
  501. if (apachesolr_environment_variable_get($env_id, 'apachesolr_read_only', APACHESOLR_READ_WRITE) == APACHESOLR_READ_ONLY) {
  502. watchdog('Apache Solr', 'Trying to update the Solr index while the environment %env_id is read-only in function %function', array('%function' => __FUNCTION__, '%env_id' => $env_id), WATCHDOG_WARNING);
  503. return FALSE;
  504. }
  505. // Instantiate a new Solr object.
  506. try {
  507. $solr = apachesolr_get_solr($env_id);
  508. $query = '*:*';
  509. if (!empty($entity_type) && !empty($bundle)) {
  510. $query = "(bundle:$bundle AND entity_type:$entity_type) OR sm_parent_entity_bundle:{$entity_type}-{$bundle}";
  511. }
  512. elseif (!empty($bundle)) {
  513. $query = "(bundle:$bundle)";
  514. }
  515. // Allow other modules to modify the delete query.
  516. // For example, use the site hash so that you only delete this site's
  517. // content: $query = 'hash:' . apachesolr_site_hash()
  518. drupal_alter('apachesolr_delete_by_query', $query);
  519. $solr->deleteByQuery($query);
  520. $solr->commit();
  521. // Log the query used for deletion.
  522. watchdog('Apache Solr', 'Deleted documents from index with query @query', array('@query' => $query), WATCHDOG_INFO);
  523. if (!empty($entity_type)) {
  524. $reindex_callback = apachesolr_entity_get_callback($entity_type, 'reindex callback');
  525. if (is_callable($reindex_callback)) {
  526. $reindex_callback($env_id, $bundle);
  527. }
  528. }
  529. else {
  530. apachesolr_index_mark_for_reindex($env_id);
  531. }
  532. apachesolr_set_last_index_updated($env_id, REQUEST_TIME);
  533. }
  534. catch (Exception $e) {
  535. watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR);
  536. return FALSE;
  537. }
  538. return TRUE;
  539. }
  540. /**
  541. * Internal function that identifies entities that are still due to be indexed.
  542. *
  543. * @param string $env_id Environment ID
  544. * @param string $entity_type
  545. *
  546. * @return SelectQuery
  547. */
  548. function _apachesolr_index_get_next_set_query($env_id, $entity_type) {
  549. $table = apachesolr_get_indexer_table($entity_type);
  550. // Get $last_entity_id and $last_changed.
  551. $last_index_position = apachesolr_get_last_index_position($env_id, $entity_type);
  552. $bundles = apachesolr_get_index_bundles($env_id, $entity_type);
  553. $last_entity_id = $last_index_position['last_entity_id'];
  554. $last_changed = $last_index_position['last_changed'];
  555. // Find the next batch of entities to index for this entity type. Note that
  556. // for ordering we're grabbing the oldest first and then ordering by ID so
  557. // that we get a definitive order.
  558. // Also note that we fetch ALL fields from the indexer table
  559. $query = db_select($table, 'aie')
  560. ->fields('aie')
  561. ->condition('aie.bundle', $bundles)
  562. ->condition('aie.status', 1)
  563. ->condition(db_or()
  564. ->condition('aie.changed', $last_changed, '>')
  565. // Tie breaker for entities that were changed at exactly
  566. // the same second as the last indexed entity
  567. ->condition(db_and()
  568. ->condition('aie.changed', $last_changed, '=')
  569. ->condition('aie.entity_id', $last_entity_id, '>')
  570. )
  571. )
  572. // It is important that everything is indexed in order of changed date and
  573. // then on entity_id because otherwise the conditions above will not match
  574. // correctly
  575. ->orderBy('aie.changed', 'ASC')
  576. ->orderBy('aie.entity_id', 'ASC')
  577. ->addTag('apachesolr_index_' . $entity_type);
  578. if ($table == 'apachesolr_index_entities') {
  579. // Other, entity-specific tables don't need this condition.
  580. $query->condition('aie.entity_type', $entity_type);
  581. }
  582. return $query;
  583. }
  584. /**
  585. * Delete from the index documents with the entity type and any of the excluded bundles.
  586. *
  587. * Also deletes all documents that have the entity type and bundle as a parent.
  588. *
  589. * @param string $env_id
  590. * The machine name of the environment.
  591. * @param string $entity_type
  592. * @param array $excluded_bundles
  593. *
  594. * @return true on success, false on failure.
  595. */
  596. function apachesolr_index_delete_bundles($env_id, $entity_type, array $excluded_bundles) {
  597. if (apachesolr_environment_variable_get($env_id, 'apachesolr_read_only', APACHESOLR_READ_WRITE) == APACHESOLR_READ_ONLY) {
  598. watchdog('Apache Solr', 'Trying to update the Solr index while the environment %env_id is read-only in function %function', array('%function' => __FUNCTION__, '%env_id' => $env_id), WATCHDOG_WARNING);
  599. return FALSE;
  600. }
  601. // Remove newly omitted bundles.
  602. try {
  603. $solr = apachesolr_get_solr($env_id);
  604. foreach ($excluded_bundles as $bundle) {
  605. $query = "(bundle:$bundle AND entity_type:$entity_type) OR sm_parent_entity_bundle:{$entity_type}-{$bundle}";
  606. // Allow other modules to modify the delete query.
  607. // For example, use the site hash so that you only delete this site's
  608. // content: $query = 'hash:' . apachesolr_site_hash()
  609. drupal_alter('apachesolr_delete_by_query', $query);
  610. $solr->deleteByQuery($query);
  611. // Log the query used for deletion.
  612. watchdog('Apache Solr', 'Deleted documents from index with query @query', array('@query' => $query), WATCHDOG_INFO);
  613. }
  614. if ($excluded_bundles) {
  615. $solr->commit();
  616. }
  617. return TRUE;
  618. }
  619. catch (Exception $e) {
  620. watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR);
  621. return FALSE;
  622. }
  623. }
  624. /**
  625. * Delete an entity from the index.
  626. *
  627. * Also deletes all documents that have the deleted document as a parent.
  628. *
  629. * @param string $env_id
  630. * The machine name of the environment.
  631. * @param string $entity_type
  632. * @param string $entity_id
  633. *
  634. * @return true on success, false on failure.
  635. */
  636. function apachesolr_index_delete_entity_from_index($env_id, $entity_type, $entity_id) {
  637. static $failed = FALSE;
  638. if ($failed) {
  639. return FALSE;
  640. }
  641. if (apachesolr_environment_variable_get($env_id, 'apachesolr_read_only', APACHESOLR_READ_WRITE) == APACHESOLR_READ_ONLY) {
  642. watchdog('Apache Solr', 'Trying to update the Solr index while the environment %env_id is read-only in function %function', array('%function' => __FUNCTION__, '%env_id' => $env_id), WATCHDOG_WARNING);
  643. return FALSE;
  644. }
  645. try {
  646. $solr = apachesolr_get_solr($env_id);
  647. $document_id = apachesolr_document_id($entity_id, $entity_type);
  648. $query = "id:\"$document_id\" OR sm_parent_document_id:\"$document_id\"";
  649. $solr->deleteByQuery($query);
  650. // Log the query used for deletion.
  651. watchdog('Apache Solr', 'Deleted documents from index with query @query', array('@query' => $query), WATCHDOG_INFO);
  652. apachesolr_set_last_index_updated($env_id, REQUEST_TIME);
  653. return TRUE;
  654. }
  655. catch (Exception $e) {
  656. watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR);
  657. // Don't keep trying queries if they are failing.
  658. $failed = TRUE;
  659. return FALSE;
  660. }
  661. }
  662. /**
  663. * Mark a certain entity type for a specific environment for reindexing.
  664. *
  665. * @param $env_id
  666. * @param null $entity_type
  667. */
  668. function apachesolr_index_mark_for_reindex($env_id, $entity_type = NULL) {
  669. foreach (entity_get_info() as $type => $entity_info) {
  670. if (($type == $entity_type) || ($entity_type == NULL)) {
  671. if (isset($entity_info['apachesolr']) && ($entity_info['apachesolr']['indexable'])) {
  672. $reindex_callback = apachesolr_entity_get_callback($type, 'reindex callback');
  673. if (!empty($reindex_callback)) {
  674. call_user_func($reindex_callback, $env_id);
  675. }
  676. }
  677. }
  678. }
  679. apachesolr_clear_last_index_position($env_id, $entity_type);
  680. cache_clear_all('*', 'cache_apachesolr', TRUE);
  681. }
  682. /**
  683. * Sets what bundles on the specified entity type should be indexed.
  684. *
  685. * @param string $env_id
  686. * The machine name of the environment.
  687. * @param string $entity_type
  688. * The entity type to index.
  689. * @param array $bundles
  690. * The machine names of the bundles to index.
  691. *
  692. * @throws Exception
  693. */
  694. function apachesolr_index_set_bundles($env_id, $entity_type, array $bundles) {
  695. $transaction = db_transaction();
  696. try {
  697. db_delete('apachesolr_index_bundles')
  698. ->condition('env_id', $env_id)
  699. ->condition('entity_type', $entity_type)
  700. ->execute();
  701. if ($bundles) {
  702. $insert = db_insert('apachesolr_index_bundles')
  703. ->fields(array('env_id', 'entity_type', 'bundle'));
  704. foreach ($bundles as $bundle) {
  705. $insert->values(array(
  706. 'env_id' => $env_id,
  707. 'entity_type' => $entity_type,
  708. 'bundle' => $bundle,
  709. ));
  710. }
  711. $insert->execute();
  712. }
  713. }
  714. catch (Exception $e) {
  715. $transaction->rollback();
  716. // Re-throw the exception so we are aware of the failure.
  717. throw $e;
  718. }
  719. }
  720. // This really should be in core, but it isn't yet. When it gets added to core,
  721. // we can remove this version.
  722. // @see http://drupal.org/node/969180
  723. if (!function_exists('entity_bundle_label')) {
  724. /**
  725. * Returns the label of a bundle.
  726. *
  727. * @param string $entity_type
  728. * The entity type; e.g. 'node' or 'user'.
  729. * @param string $bundle_name
  730. * The bundle for which we want the label from
  731. *
  732. * @return
  733. * A string with the human-readable name of the bundle, or FALSE if not specified.
  734. */
  735. function entity_bundle_label($entity_type, $bundle_name) {
  736. $labels = &drupal_static(__FUNCTION__, array());
  737. if (empty($labels)) {
  738. foreach (entity_get_info() as $type => $info) {
  739. foreach ($info['bundles'] as $bundle => $bundle_info) {
  740. $labels[$type][$bundle] = !empty($bundle_info['label']) ? $bundle_info['label'] : FALSE;
  741. }
  742. }
  743. }
  744. return $labels[$entity_type][$bundle_name];
  745. }
  746. }
  747. /**
  748. * Builds the node-specific information for a Solr document.
  749. *
  750. * @param ApacheSolrDocument $document
  751. * The Solr document we are building up.
  752. * @param object $node
  753. * The entity we are indexing.
  754. * @param string $entity_type
  755. * The type of entity we're dealing with.
  756. * @param string $env_id
  757. * The type of entity we're dealing with.
  758. *
  759. * @return array A set of ApacheSolrDocument documents
  760. */
  761. function apachesolr_index_node_solr_document(ApacheSolrDocument $document, $node, $entity_type, $env_id) {
  762. // None of these get added unless they are explicitly in our schema.xml
  763. $document->label = apachesolr_clean_text($node->title);
  764. // Build the node body.
  765. $language = !empty($node->language) ? $node->language : LANGUAGE_NONE;
  766. $build = node_view($node, 'search_index', $language);
  767. // Remove useless html crap out of the render.
  768. unset($build['#theme']);
  769. // Allow cache if it's present
  770. $build['#cache'] = true;
  771. // Render it into html
  772. $text = drupal_render($build);
  773. $document->content = apachesolr_clean_text($text);
  774. // Adding the teaser
  775. if (isset($node->teaser)) {
  776. $document->teaser = apachesolr_clean_text($node->teaser);
  777. }
  778. else {
  779. // If there is no node teaser we will have to generate the teaser
  780. // ourselves. We have to be careful to not leak the author and other
  781. // information that is normally also not visible.
  782. if (isset($node->body[$language][0]['safe_summary'])) {
  783. $document->teaser = apachesolr_clean_text($node->body[$language][0]['safe_summary']);
  784. }
  785. else {
  786. $document->teaser = truncate_utf8($document->content, 300, TRUE);
  787. }
  788. }
  789. // Author information
  790. if ($node->uid == 0 || strlen($node->name) == 0) {
  791. // @see user_validate_name(). !'0' === TRUE.
  792. $document->ss_name = '0';
  793. }
  794. else {
  795. $document->ss_name = $node->name;
  796. // We want the name to be searchable for keywords.
  797. $document->tos_name = $node->name;
  798. }
  799. // Index formatted username so it can be searched and sorted on.
  800. $account = (object) array('uid' => $node->uid, 'name' => $node->name);
  801. $username = format_username($account);
  802. $document->ss_name_formatted = $username;
  803. $document->tos_name_formatted = $username;
  804. $document->is_uid = $node->uid;
  805. $document->bs_status = $node->status;
  806. $document->bs_sticky = $node->sticky;
  807. $document->bs_promote = $node->promote;
  808. $document->is_tnid = $node->tnid;
  809. $document->bs_translate = $node->translate;
  810. // Timestamp of the node
  811. $document->ds_created = apachesolr_date_iso($node->created);
  812. $document->ds_changed = apachesolr_date_iso($node->changed);
  813. // Comment counts + time
  814. if (isset($node->last_comment_timestamp) && !empty($node->comment_count)) {
  815. $document->ds_last_comment_timestamp = apachesolr_date_iso($node->last_comment_timestamp);
  816. $document->ds_last_comment_or_change = apachesolr_date_iso(max($node->last_comment_timestamp, $node->changed));
  817. $document->is_comment_count = $node->comment_count;
  818. }
  819. else {
  820. $document->ds_last_comment_or_change = apachesolr_date_iso($node->changed);
  821. }
  822. // Fetch extra data normally not visible, including comments.
  823. // We do this manually (with module_implements instead of node_invoke_nodeapi)
  824. // because we want a keyed array to come back. Only in this way can we decide
  825. // whether to index comments or not.
  826. $extra = array();
  827. $excludes = variable_get('apachesolr_exclude_nodeapi_types', array());
  828. $exclude_nodeapi = isset($excludes[$node->type]) ? $excludes[$node->type] : array();
  829. foreach (module_implements('node_update_index') as $module) {
  830. // Invoke nodeapi if this module has not been excluded, for example,
  831. // exclude 'comment' for a type to skip indexing its comments.
  832. if (empty($exclude_nodeapi[$module])) {
  833. $function = $module . '_node_update_index';
  834. if ($output = $function($node)) {
  835. $extra[$module] = $output;
  836. }
  837. }
  838. }
  839. // Adding the text of the comments
  840. if (isset($extra['comment'])) {
  841. $comments = $extra['comment'];
  842. // Remove comments from the extra fields
  843. unset($extra['comment']);
  844. $document->ts_comments = apachesolr_clean_text($comments);
  845. // @todo: do we want to reproduce apachesolr_add_tags_to_document() for comments?
  846. }
  847. // If there are other extra fields, add them to the document
  848. if (!empty($extra)) {
  849. // Use an omit-norms text field since this is generally going to be short; not
  850. // really a full-text field.
  851. $document->tos_content_extra = apachesolr_clean_text(implode(' ', $extra));
  852. }
  853. // Add additional indexing based on the body of each record.
  854. apachesolr_index_add_tags_to_document($document, $text);
  855. // Generic use case for future reference. Callbacks can
  856. // allow you to send back multiple documents
  857. $documents = array();
  858. $documents[] = $document;
  859. return $documents;
  860. }
  861. /**
  862. * Function that will be executed if the node bundles were updated.
  863. * Currently it does nothing, but it could potentially do something later on.
  864. *
  865. * @param $env_id
  866. * @param $existing_bundles
  867. * @param $new_bundles
  868. */
  869. function apachesolr_index_node_bundles_changed($env_id, $existing_bundles, $new_bundles) {
  870. // Nothing to do for now.
  871. }
  872. /**
  873. * Reindexing callback for ApacheSolr, for nodes.
  874. *
  875. * @param string $env_id
  876. * The machine name of the environment.
  877. * @param string|null $bundle
  878. * (optional) The bundle type to reindex. If not used
  879. * all bundles will be re-indexed.
  880. *
  881. * @return null
  882. * returns NULL if the specified bundle is not in the indexable bundles list
  883. *
  884. * @throws Exception
  885. */
  886. function apachesolr_index_node_solr_reindex($env_id, $bundle = NULL) {
  887. $indexer_table = apachesolr_get_indexer_table('node');
  888. $transaction = db_transaction();
  889. try {
  890. $indexable_bundles = apachesolr_get_index_bundles($env_id, 'node');
  891. if ($bundle && !empty($indexable_bundles) && !in_array($bundle, $indexable_bundles)) {
  892. // The bundle specified is not in the indexable bundles list.
  893. return NULL;
  894. }
  895. // Leave status 0 rows - those need to be
  896. // removed from the index later.
  897. $delete = db_delete($indexer_table);
  898. $delete->condition('status', 1);
  899. if (!empty($bundle)) {
  900. $delete->condition('bundle', $bundle);
  901. }
  902. elseif (!empty($indexable_bundles)) {
  903. $delete->condition('bundle', $indexable_bundles, 'IN');
  904. }
  905. $delete->execute();
  906. $select = db_select('node', 'n');
  907. $select->condition('status', 1);
  908. $select->addExpression("'node'", 'entity_type');
  909. $select->addField('n', 'nid', 'entity_id');
  910. $select->addField('n', 'type', 'bundle');
  911. $select->addField('n', 'status', 'status');
  912. $select->addExpression(REQUEST_TIME, 'changed');
  913. if ($bundle) {
  914. // Mark all nodes of the specified content type for reindexing.
  915. $select->condition('n.type', $bundle);
  916. }
  917. elseif (!empty($indexable_bundles)) {
  918. // Restrict reindex to content types in the indexable bundles list.
  919. $select->condition('n.type', $indexable_bundles, 'IN');
  920. }
  921. $insert = db_insert($indexer_table)
  922. ->fields(array('entity_id', 'bundle', 'status', 'entity_type', 'changed'))
  923. ->from($select)
  924. ->execute();
  925. }
  926. catch (Exception $e) {
  927. $transaction->rollback();
  928. throw $e;
  929. }
  930. }
  931. /**
  932. * Status callback for ApacheSolr, for nodes.
  933. * after indexing a certain amount of nodes
  934. *
  935. * @param $entity_id
  936. * @param $entity_type
  937. * @param $entity
  938. * In the case where the status is being checked while the entity is being
  939. * saved, this contains the full entity object. In other cases, it will be
  940. * NULL.
  941. *
  942. * @return int
  943. * The status of the node
  944. */
  945. function apachesolr_index_node_status_callback($entity_id, $entity_type, $entity = NULL) {
  946. if ($entity === NULL) {
  947. $entity = entity_load($entity_type, array($entity_id));
  948. $entity = $entity ? reset($entity) : FALSE;
  949. }
  950. if (empty($entity)) {
  951. // If the object failed to load, just stop.
  952. return FALSE;
  953. }
  954. // Make sure we have an integer value.
  955. // Anything different from 1 becomes zero
  956. return ($entity->status == 1 ? 1 : 0);
  957. }
  958. /**
  959. * Callback that converts term_reference field into an array
  960. *
  961. * @param object $node
  962. * @param string $field_name
  963. * @param string $index_key
  964. * @param array $field_info
  965. * @return array $fields
  966. * fields that will be indexed for this term reference
  967. */
  968. function apachesolr_term_reference_indexing_callback($node, $field_name, $index_key, array $field_info) {
  969. // Keep ancestors cached
  970. $ancestors = &drupal_static(__FUNCTION__, array());
  971. $fields = array();
  972. $vocab_names = array();
  973. if (!empty($node->{$field_name}) && function_exists('taxonomy_get_parents_all')) {
  974. $field = $node->$field_name;
  975. list($lang, $items) = each($field);
  976. foreach ($items as $item) {
  977. // Triple indexing of tids lets us do efficient searches (on tid)
  978. // and do accurate per field or per-vocabulary faceting.
  979. // By including the ancestors to a term in the index we make
  980. // sure that searches for general categories match specific
  981. // categories, e.g. Fruit -> apple, a search for fruit will find
  982. // content categorized with apple.
  983. if (!isset($ancestors[$item['tid']])) {
  984. $ancestors[$item['tid']] = taxonomy_get_parents_all($item['tid']);
  985. }
  986. foreach ($ancestors[$item['tid']] as $ancestor) {
  987. // Index parent term against the field. Note that this happens
  988. // regardless of whether the facet is set to show as a hierarchy or not.
  989. // We would need a separate field if we were to index terms without any
  990. // hierarchy at all.
  991. // If the term is singular, then we cannot add another value to the
  992. // document as the field is single
  993. if ($field_info['multiple']) {
  994. $fields[] = array(
  995. 'key' => $index_key,
  996. 'value' => $ancestor->tid,
  997. );
  998. }
  999. $fields[] = array(
  1000. 'key' => 'tid',
  1001. 'value' => $ancestor->tid,
  1002. );
  1003. $fields[] = array(
  1004. 'key' => 'im_vid_' . $ancestor->vid,
  1005. 'value' => $ancestor->tid,
  1006. );
  1007. $name = apachesolr_clean_text($ancestor->name);
  1008. $vocab_names[$ancestor->vid][] = $name;
  1009. // We index each name as a string for cross-site faceting
  1010. // using the vocab name rather than vid in field construction .
  1011. $fields[] = array(
  1012. 'key' => 'sm_vid_' . apachesolr_vocab_name($ancestor->vid),
  1013. 'value' => $name,
  1014. );
  1015. }
  1016. }
  1017. // Index the term names into a text field for MLT queries and keyword searching.
  1018. foreach ($vocab_names as $vid => $names) {
  1019. $fields[] = array(
  1020. 'key' => 'tm_vid_' . $vid . '_names',
  1021. 'value' => implode(' ', $names),
  1022. );
  1023. }
  1024. }
  1025. return $fields;
  1026. }
  1027. /**
  1028. * Helper function - return a safe (PHP identifier) vocabulary name.
  1029. *
  1030. * @param integer $vid
  1031. * @return string
  1032. */
  1033. function apachesolr_vocab_name($vid) {
  1034. $names = &drupal_static(__FUNCTION__, array());
  1035. if (!isset($names[$vid])) {
  1036. $vocab_name = db_query('SELECT v.name FROM {taxonomy_vocabulary} v WHERE v.vid = :vid', array(':vid' => $vid))->fetchField();
  1037. $names[$vid] = preg_replace('/[^a-zA-Z0-9_\x7f-\xff]/', '_', $vocab_name);
  1038. // Fallback for names ending up all as '_'.
  1039. $check = rtrim($names[$vid], '_');
  1040. if (!$check) {
  1041. $names[$vid] = '_' . $vid . '_';
  1042. }
  1043. }
  1044. return $names[$vid];
  1045. }
  1046. /**
  1047. * Callback that converts list module field into an array
  1048. * For every multivalued value we also add a single value to be able to
  1049. * use the stats
  1050. *
  1051. * @param object $entity
  1052. * @param string $field_name
  1053. * @param string $index_key
  1054. * @param array $field_info
  1055. * @return array $fields
  1056. */
  1057. function apachesolr_fields_default_indexing_callback($entity, $field_name, $index_key, array $field_info) {
  1058. $fields = array();
  1059. $numeric = TRUE;
  1060. if (!empty($entity->{$field_name})) {
  1061. $field = $entity->$field_name;
  1062. list($lang, $values) = each($field);
  1063. switch ($field_info['index_type']) {
  1064. case 'integer':
  1065. case 'half-int':
  1066. case 'sint':
  1067. case 'tint':
  1068. case 'thalf-int':
  1069. case 'boolean':
  1070. $function = 'intval';
  1071. break;
  1072. case 'float':
  1073. case 'double':
  1074. case 'sfloat':
  1075. case 'sdouble':
  1076. case 'tfloat':
  1077. case 'tdouble':
  1078. $function = 'apachesolr_floatval';
  1079. break;
  1080. default:
  1081. $numeric = FALSE;
  1082. $function = 'apachesolr_clean_text';
  1083. }
  1084. for ($i = 0; $i < count($values); $i++) {
  1085. $fields[] = array(
  1086. 'key' => $index_key,
  1087. 'value' => $function($values[$i]['value']),
  1088. );
  1089. }
  1090. // Also store the first value of the field in a singular index for multi value fields
  1091. if ($field_info['multiple'] && $numeric && !empty($values[0])) {
  1092. $singular_field_info = $field_info;
  1093. $singular_field_info['multiple'] = FALSE;
  1094. $single_key = apachesolr_index_key($singular_field_info);
  1095. $fields[] = array(
  1096. 'key' => $single_key,
  1097. 'value' => $function($values[0]['value']),
  1098. );
  1099. }
  1100. }
  1101. return $fields;
  1102. }
  1103. /**
  1104. * This function is used during indexing to normalize the DATE and DATETIME
  1105. * fields into the appropriate format for Apache Solr.
  1106. *
  1107. * @param object $entity
  1108. * @param string $field_name
  1109. * @param string $index_key
  1110. * @param array $field_info
  1111. * @return array $fields
  1112. */
  1113. function apachesolr_date_default_indexing_callback($entity, $field_name, $index_key, array $field_info) {
  1114. $fields = array();
  1115. if (!empty($entity->{$field_name})) {
  1116. $field = $entity->$field_name;
  1117. list($lang, $values) = each($field);
  1118. // Construct a Solr-ready date string in UTC time zone based on the field's date string and time zone.
  1119. $tz = new DateTimeZone(isset($field['timezone']) ? $field['timezone'] : 'UTC');
  1120. // $fields may end up having two values; one for the start date
  1121. // and one for the end date.
  1122. foreach ($values as $value) {
  1123. if ($date = date_create($value['value'], $tz)) {
  1124. $index_value = apachesolr_date_iso($date->format('U'));
  1125. $fields[] = array(
  1126. 'key' => $index_key,
  1127. 'value' => $index_value,
  1128. );
  1129. }
  1130. if (isset($value['value2'])) {
  1131. if ($date = date_create($value['value2'], $tz)) {
  1132. $index_value = apachesolr_date_iso($date->format('U'));
  1133. $fields[] = array(
  1134. // The value2 element is the end date. Therefore it gets indexed
  1135. // into its own Solr field.
  1136. 'key' => $index_key . '_end',
  1137. 'value' => $index_value,
  1138. );
  1139. }
  1140. }
  1141. }
  1142. }
  1143. return $fields;
  1144. }
  1145. /**
  1146. * This function is used during indexing to normalize the DATESTAMP fields
  1147. * into the appropriate format for Apache Solr.
  1148. *
  1149. * @param object $entity
  1150. * @param string $field_name
  1151. * @param string $index_key
  1152. * @param array $field_info
  1153. * @return array $fields
  1154. */
  1155. function apachesolr_datestamp_default_indexing_callback($entity, $field_name, $index_key, array $field_info) {
  1156. $fields = array();
  1157. if (!empty($entity->{$field_name})) {
  1158. // $fields may end up having two values; one for the start date
  1159. // and one for the end date.
  1160. $field = $entity->$field_name;
  1161. list($lang, $values) = each($field);
  1162. foreach ($values as $value) {
  1163. if (isset($value['value']) && $value['value'] != 0) {
  1164. $index_value = apachesolr_date_iso($value['value']);
  1165. $fields[] = array(
  1166. 'key' => $index_key,
  1167. 'value' => $index_value,
  1168. );
  1169. }
  1170. if (isset($value['value2']) && $value['value'] != 0) {
  1171. $index_value = apachesolr_date_iso($value['value2']);
  1172. $fields[] = array(
  1173. // The value2 element is the end date. Therefore it gets indexed
  1174. // into its own Solr field.
  1175. 'key' => $index_key . '_end',
  1176. 'value' => $index_value,
  1177. );
  1178. }
  1179. }
  1180. }
  1181. return $fields;
  1182. }
  1183. function apachesolr_floatval($value) {
  1184. return sprintf('%0.20f', $value);
  1185. }
  1186. /**
  1187. * Indexing callback for the node_reference module
  1188. * by the references module
  1189. *
  1190. * @param object $entity
  1191. * @param string $field_name
  1192. * @param string $index_key
  1193. * @param array $field_info
  1194. * @return array $fields
  1195. */
  1196. function apachesolr_nodereference_indexing_callback($entity, $field_name, $index_key, array $field_info) {
  1197. $fields = array();
  1198. // Druapl 7 core sets all fields to use LANGUAGE_NONE even if the entity
  1199. // (e.g. node) is flagged as being in a specific language.
  1200. if (!empty($entity->{$field_name}) && isset($entity->{$field_name}[LANGUAGE_NONE])) {
  1201. $index_key = apachesolr_index_key($field_info);
  1202. foreach ($entity->{$field_name}[LANGUAGE_NONE] as $reference) {
  1203. if ($index_value = (!empty($reference['nid'])) ? $reference['nid'] : FALSE) {
  1204. $fields[] = array(
  1205. 'key' => $index_key,
  1206. 'value' => $index_value,
  1207. );
  1208. }
  1209. }
  1210. }
  1211. return $fields;
  1212. }
  1213. /**
  1214. * Indexing callback for the user_reference module
  1215. * by the references module
  1216. *
  1217. * @param object $entity
  1218. * @param string $field_name
  1219. * @param string $index_key
  1220. * @param array $field_info
  1221. * @return array $fields
  1222. */
  1223. function apachesolr_userreference_indexing_callback($entity, $field_name, $index_key, array $field_info) {
  1224. $fields = array();
  1225. // Druapl 7 core sets all fields to use LANGUAGE_NONE even if the entity
  1226. // (e.g. node) is flagged as being in a specific language.
  1227. if (!empty($entity->{$field_name}) && isset($entity->{$field_name}[LANGUAGE_NONE])) {
  1228. $index_key = apachesolr_index_key($field_info);
  1229. foreach ($entity->{$field_name}[LANGUAGE_NONE] as $reference) {
  1230. if ($index_value = (isset($reference['uid']) && strlen($reference['uid'])) ? $reference['uid'] : FALSE) {
  1231. $fields[] = array(
  1232. 'key' => $index_key,
  1233. 'value' => $index_value,
  1234. );
  1235. }
  1236. }
  1237. }
  1238. return $fields;
  1239. }
  1240. /**
  1241. * Indexing callback for entityreference fields.
  1242. *
  1243. * @param object $entity
  1244. * @param string $field_name
  1245. * @param string $index_key
  1246. * @param array $field_info
  1247. * @return array $fields
  1248. *
  1249. */
  1250. function apachesolr_entityreference_indexing_callback($entity, $field_name, $index_key, $field_info) {
  1251. $fields = array();
  1252. if (!empty($entity->{$field_name}) && array_key_exists(LANGUAGE_NONE, $entity->$field_name)) {
  1253. // Gets entity type and index key. We need to prefix the ID with the entity
  1254. // type so we know what entity we are dealing with in the mapping callback.
  1255. $entity_type = $field_info['field']['settings']['target_type'];
  1256. $index_key = apachesolr_index_key($field_info);
  1257. // Iterates over all references and adds them to the fields.
  1258. foreach ($entity->{$field_name}[LANGUAGE_NONE] as $reference) {
  1259. if ($id = (!empty($reference['target_id'])) ? $reference['target_id'] : FALSE) {
  1260. $fields[] = array(
  1261. 'key' => $index_key,
  1262. 'value' => $entity_type . ':' . $id,
  1263. );
  1264. }
  1265. }
  1266. }
  1267. return $fields;
  1268. }
  1269. /**
  1270. * hook_cron() helper to try to make the index table consistent with their
  1271. * respective entity table.
  1272. */
  1273. function apachesolr_index_node_check_table() {
  1274. // Check for unpublished content that wasn't deleted from the index.
  1275. $table = apachesolr_get_indexer_table('node');
  1276. // We do not check more nodes than double the cron limit per time
  1277. // Update or delete at most this many in each Solr query.
  1278. $limit = variable_get('apachesolr_cron_mass_limit', 500);
  1279. $query = db_select($table, 'aie')
  1280. ->fields('n', array('nid', 'status'))
  1281. ->where('aie.status <> n.status')
  1282. ->range(0, ($limit * 2))
  1283. ->addTag('apachesolr_index_node');
  1284. $query->innerJoin('node', 'n', 'n.nid = aie.entity_id');
  1285. $nodes = $query->execute()->fetchAllAssoc('nid');
  1286. $node_lists = array_chunk($nodes, $limit, TRUE);
  1287. foreach ($node_lists as $nodes) {
  1288. watchdog('Apache Solr', 'On cron running apachesolr_nodeapi_mass_update() on nids @nids', array('@nids' => implode(',', array_keys($nodes))), WATCHDOG_NOTICE);
  1289. if (!apachesolr_index_nodeapi_mass_update($nodes, $table)) {
  1290. // Solr query failed - so stop trying.
  1291. break;
  1292. }
  1293. }
  1294. // Check for deleted content that wasn't deleted from the index.
  1295. $query = db_select($table, 'aien')
  1296. ->isNull('n.nid')
  1297. ->range(0, ($limit*2));
  1298. $query->addExpression('aien.entity_id', 'nid');
  1299. $query->leftJoin('node', 'n', 'n.nid = aien.entity_id');
  1300. $nodes = $query->execute()->fetchAllAssoc('nid');
  1301. $node_lists = array_chunk($nodes, $limit, TRUE);
  1302. foreach ($node_lists as $nodes) {
  1303. watchdog('Apache Solr', 'On cron running apachesolr_nodeapi_mass_delete() on nids @nids', array('@nids' => implode(',', array_keys($nodes))), WATCHDOG_NOTICE);
  1304. if (!apachesolr_index_nodeapi_mass_delete($nodes, $table)) {
  1305. // Solr query failed - so stop trying.
  1306. break;
  1307. }
  1308. }
  1309. }
  1310. /**
  1311. * Mass Update nodes from the solr indexer table
  1312. *
  1313. * @param array $nodes
  1314. * @param string $table
  1315. * @return boolean
  1316. * true if we mass updated, false if failed
  1317. */
  1318. function apachesolr_index_nodeapi_mass_update(array $nodes, $table = NULL) {
  1319. if (empty($nodes)) {
  1320. return TRUE;
  1321. }
  1322. if (empty($table)) {
  1323. $table = apachesolr_get_indexer_table('node');
  1324. }
  1325. if (apachesolr_environment_variable_get(apachesolr_default_environment(), 'apachesolr_read_only', APACHESOLR_READ_WRITE) == APACHESOLR_READ_ONLY) {
  1326. watchdog('Apache Solr', 'Trying to update the Solr index while the environment %env_id is read-only in function %function', array('%function' => __FUNCTION__, '%env_id' => apachesolr_default_environment()), WATCHDOG_WARNING);
  1327. return FALSE;
  1328. }
  1329. $published_ids = array();
  1330. $unpublished_ids = array();
  1331. foreach ($nodes as $node) {
  1332. if ($node->status) {
  1333. $published_ids[$node->nid] = apachesolr_document_id($node->nid);
  1334. }
  1335. else {
  1336. $unpublished_ids[$node->nid] = apachesolr_document_id($node->nid);
  1337. }
  1338. }
  1339. try {
  1340. $env_id = apachesolr_default_environment();
  1341. $solr = apachesolr_get_solr($env_id);
  1342. $solr->deleteByMultipleIds($unpublished_ids);
  1343. apachesolr_set_last_index_updated($env_id, REQUEST_TIME);
  1344. // There was no exception, so update the table.
  1345. if ($published_ids) {
  1346. db_update($table)
  1347. ->fields(array('changed' => REQUEST_TIME, 'status' => 1))
  1348. ->condition('entity_id', array_keys($published_ids), 'IN')
  1349. ->execute();
  1350. }
  1351. if ($unpublished_ids) {
  1352. db_update($table)
  1353. ->fields(array('changed' => REQUEST_TIME, 'status' => 0))
  1354. ->condition('entity_id', array_keys($unpublished_ids), 'IN')
  1355. ->execute();
  1356. }
  1357. return TRUE;
  1358. }
  1359. catch (Exception $e) {
  1360. watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR);
  1361. return FALSE;
  1362. }
  1363. }
  1364. /**
  1365. * Mass delete nodes from the solr indexer tables.
  1366. *
  1367. * @param array $nodes
  1368. * @param string $table
  1369. * @return boolean
  1370. * true if we mass updated, false if failed
  1371. */
  1372. function apachesolr_index_nodeapi_mass_delete(array $nodes, $table = NULL) {
  1373. if (empty($nodes)) {
  1374. return TRUE;
  1375. }
  1376. if (empty($table)) {
  1377. $table = apachesolr_get_indexer_table('node');
  1378. }
  1379. if (apachesolr_environment_variable_get(apachesolr_default_environment(), 'apachesolr_read_only', APACHESOLR_READ_WRITE) == APACHESOLR_READ_ONLY) {
  1380. watchdog('Apache Solr', 'Trying to update the Solr index while the environment %env_id is read-only in function %function', array('%function' => __FUNCTION__, '%env_id' => apachesolr_default_environment()), WATCHDOG_WARNING);
  1381. return FALSE;
  1382. }
  1383. $ids = array();
  1384. $nids = array();
  1385. foreach ($nodes as $node) {
  1386. $ids[] = apachesolr_document_id($node->nid);
  1387. $nids[] = $node->nid;
  1388. }
  1389. try {
  1390. $env_id = apachesolr_default_environment();
  1391. $solr = apachesolr_get_solr($env_id);
  1392. $solr->deleteByMultipleIds($ids);
  1393. apachesolr_set_last_index_updated($env_id, REQUEST_TIME);
  1394. // There was no exception, so update the table.
  1395. db_delete($table)
  1396. ->condition('entity_id', $nids, 'IN')
  1397. ->execute();
  1398. return TRUE;
  1399. }
  1400. catch (Exception $e) {
  1401. watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR);
  1402. return FALSE;
  1403. }
  1404. }