sphinxapi.class.php 43 KB


  1. <?php
  2. /**
  3. * sphinx搜索接口类
  4. */
  5. //
  6. // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
  7. //
  8. //
  9. // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
  10. //
  11. // This program is free software; you can redistribute it and/or modify
  12. // it under the terms of the GNU General Public License. You should have
  13. // received a copy of the GPL license along with this program; if you
  14. // did not, you can find it at http://www.gnu.org/
  15. //
  16. /////////////////////////////////////////////////////////////////////////////
  17. // PHP version of Sphinx searchd client (PHP API)
  18. /////////////////////////////////////////////////////////////////////////////
  19. /// known searchd commands
  20. define ( "SEARCHD_COMMAND_SEARCH", 0 );
  21. define ( "SEARCHD_COMMAND_EXCERPT", 1 );
  22. define ( "SEARCHD_COMMAND_UPDATE", 2 );
  23. define ( "SEARCHD_COMMAND_KEYWORDS",3 );
  24. define ( "SEARCHD_COMMAND_PERSIST", 4 );
  25. define ( "SEARCHD_COMMAND_STATUS", 5 );
  26. define ( "SEARCHD_COMMAND_QUERY", 6 );
  27. /// current client-side command implementation versions
  28. define ( "VER_COMMAND_SEARCH", 0x116 );
  29. define ( "VER_COMMAND_EXCERPT", 0x100 );
  30. define ( "VER_COMMAND_UPDATE", 0x102 );
  31. define ( "VER_COMMAND_KEYWORDS", 0x100 );
  32. define ( "VER_COMMAND_STATUS", 0x100 );
  33. define ( "VER_COMMAND_QUERY", 0x100 );
  34. /// known searchd status codes
  35. define ( "SEARCHD_OK", 0 );
  36. define ( "SEARCHD_ERROR", 1 );
  37. define ( "SEARCHD_RETRY", 2 );
  38. define ( "SEARCHD_WARNING", 3 );
  39. /// known match modes
  40. define ( "SPH_MATCH_ALL", 0 );
  41. define ( "SPH_MATCH_ANY", 1 );
  42. define ( "SPH_MATCH_PHRASE", 2 );
  43. define ( "SPH_MATCH_BOOLEAN", 3 );
  44. define ( "SPH_MATCH_EXTENDED", 4 );
  45. define ( "SPH_MATCH_FULLSCAN", 5 );
  46. define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
  47. /// known ranking modes (ext2 only)
  48. define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
  49. define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
  50. define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
  51. define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  52. define ( "SPH_RANK_PROXIMITY", 4 );
  53. define ( "SPH_RANK_MATCHANY", 5 );
  54. define ( "SPH_RANK_FIELDMASK", 6 );
  55. /// known sort modes
  56. define ( "SPH_SORT_RELEVANCE", 0 );
  57. define ( "SPH_SORT_ATTR_DESC", 1 );
  58. define ( "SPH_SORT_ATTR_ASC", 2 );
  59. define ( "SPH_SORT_TIME_SEGMENTS", 3 );
  60. define ( "SPH_SORT_EXTENDED", 4 );
  61. define ( "SPH_SORT_EXPR", 5 );
  62. /// known filter types
  63. define ( "SPH_FILTER_VALUES", 0 );
  64. define ( "SPH_FILTER_RANGE", 1 );
  65. define ( "SPH_FILTER_FLOATRANGE", 2 );
  66. /// known attribute types
  67. define ( "SPH_ATTR_INTEGER", 1 );
  68. define ( "SPH_ATTR_TIMESTAMP", 2 );
  69. define ( "SPH_ATTR_ORDINAL", 3 );
  70. define ( "SPH_ATTR_BOOL", 4 );
  71. define ( "SPH_ATTR_FLOAT", 5 );
  72. define ( "SPH_ATTR_BIGINT", 6 );
  73. define ( "SPH_ATTR_MULTI", 0x40000000 );
  74. /// known grouping functions
  75. define ( "SPH_GROUPBY_DAY", 0 );
  76. define ( "SPH_GROUPBY_WEEK", 1 );
  77. define ( "SPH_GROUPBY_MONTH", 2 );
  78. define ( "SPH_GROUPBY_YEAR", 3 );
  79. define ( "SPH_GROUPBY_ATTR", 4 );
  80. define ( "SPH_GROUPBY_ATTRPAIR", 5 );
  81. // important properties of PHP's integers:
  82. // - always signed (one bit short of PHP_INT_SIZE)
  83. // - conversion from string to int is saturated
  84. // - float is double
  85. // - div converts arguments to floats
  86. // - mod converts arguments to ints
  87. // the packing code below works as follows:
  88. // - when we got an int, just pack it
  89. // if performance is a problem, this is the branch users should aim for
  90. //
  91. // - otherwise, we got a number in string form
  92. // this might be due to different reasons, but we assume that this is
  93. // because it didn't fit into PHP int
  94. //
  95. // - factor the string into high and low ints for packing
  96. // - if we have bcmath, then it is used
  97. // - if we don't, we have to do it manually (this is the fun part)
  98. //
  99. // - x64 branch does factoring using ints
  100. // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
  101. //
  102. // unpacking routines are pretty much the same.
  103. // - return ints if we can
  104. // - otherwise format number into a string
  105. /// pack 64-bit signed
  106. function sphPackI64 ( $v )
  107. {
  108. assert ( is_numeric($v) );
  109. // x64
  110. if ( PHP_INT_SIZE>=8 )
  111. {
  112. $v = (int)$v;
  113. return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
  114. }
  115. // x32, int
  116. if ( is_int($v) )
  117. return pack ( "NN", $v < 0 ? -1 : 0, $v );
  118. // x32, bcmath
  119. if ( function_exists("bcmul") )
  120. {
  121. if ( bccomp ( $v, 0 ) == -1 )
  122. $v = bcadd ( "18446744073709551616", $v );
  123. $h = bcdiv ( $v, "4294967296", 0 );
  124. $l = bcmod ( $v, "4294967296" );
  125. return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
  126. }
  127. // x32, no-bcmath
  128. $p = max(0, strlen($v) - 13);
  129. $lo = abs((float)substr($v, $p));
  130. $hi = abs((float)substr($v, 0, $p));
  131. $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
  132. $q = floor($m/4294967296.0);
  133. $l = $m - ($q*4294967296.0);
  134. $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328
  135. if ( $v<0 )
  136. {
  137. if ( $l==0 )
  138. $h = 4294967296.0 - $h;
  139. else
  140. {
  141. $h = 4294967295.0 - $h;
  142. $l = 4294967296.0 - $l;
  143. }
  144. }
  145. return pack ( "NN", $h, $l );
  146. }
  147. /// pack 64-bit unsigned
  148. function sphPackU64 ( $v )
  149. {
  150. assert ( is_numeric($v) );
  151. // x64
  152. if ( PHP_INT_SIZE>=8 )
  153. {
  154. assert ( $v>=0 );
  155. // x64, int
  156. if ( is_int($v) )
  157. return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
  158. // x64, bcmath
  159. if ( function_exists("bcmul") )
  160. {
  161. $h = bcdiv ( $v, 4294967296, 0 );
  162. $l = bcmod ( $v, 4294967296 );
  163. return pack ( "NN", $h, $l );
  164. }
  165. // x64, no-bcmath
  166. $p = max ( 0, strlen($v) - 13 );
  167. $lo = (int)substr ( $v, $p );
  168. $hi = (int)substr ( $v, 0, $p );
  169. $m = $lo + $hi*1316134912;
  170. $l = $m % 4294967296;
  171. $h = $hi*2328 + (int)($m/4294967296);
  172. return pack ( "NN", $h, $l );
  173. }
  174. // x32, int
  175. if ( is_int($v) )
  176. return pack ( "NN", 0, $v );
  177. // x32, bcmath
  178. if ( function_exists("bcmul") )
  179. {
  180. $h = bcdiv ( $v, "4294967296", 0 );
  181. $l = bcmod ( $v, "4294967296" );
  182. return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
  183. }
  184. // x32, no-bcmath
  185. $p = max(0, strlen($v) - 13);
  186. $lo = (float)substr($v, $p);
  187. $hi = (float)substr($v, 0, $p);
  188. $m = $lo + $hi*1316134912.0;
  189. $q = floor($m / 4294967296.0);
  190. $l = $m - ($q * 4294967296.0);
  191. $h = $hi*2328.0 + $q;
  192. return pack ( "NN", $h, $l );
  193. }
  194. // unpack 64-bit unsigned
  195. function sphUnpackU64 ( $v )
  196. {
  197. list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
  198. if ( PHP_INT_SIZE>=8 )
  199. {
  200. if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
  201. if ( $lo<0 ) $lo += (1<<32);
  202. // x64, int
  203. if ( $hi<=2147483647 )
  204. return ($hi<<32) + $lo;
  205. // x64, bcmath
  206. if ( function_exists("bcmul") )
  207. return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
  208. // x64, no-bcmath
  209. $C = 100000;
  210. $h = ((int)($hi / $C) << 32) + (int)($lo / $C);
  211. $l = (($hi % $C) << 32) + ($lo % $C);
  212. if ( $l>$C )
  213. {
  214. $h += (int)($l / $C);
  215. $l = $l % $C;
  216. }
  217. if ( $h==0 )
  218. return $l;
  219. return sprintf ( "%d%05d", $h, $l );
  220. }
  221. // x32, int
  222. if ( $hi==0 )
  223. {
  224. if ( $lo>0 )
  225. return $lo;
  226. return sprintf ( "%u", $lo );
  227. }
  228. $hi = sprintf ( "%u", $hi );
  229. $lo = sprintf ( "%u", $lo );
  230. // x32, bcmath
  231. if ( function_exists("bcmul") )
  232. return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
  233. // x32, no-bcmath
  234. $hi = (float)$hi;
  235. $lo = (float)$lo;
  236. $q = floor($hi/10000000.0);
  237. $r = $hi - $q*10000000.0;
  238. $m = $lo + $r*4967296.0;
  239. $mq = floor($m/10000000.0);
  240. $l = $m - $mq*10000000.0;
  241. $h = $q*4294967296.0 + $r*429.0 + $mq;
  242. $h = sprintf ( "%.0f", $h );
  243. $l = sprintf ( "%07.0f", $l );
  244. if ( $h=="0" )
  245. return sprintf( "%.0f", (float)$l );
  246. return $h . $l;
  247. }
  248. // unpack 64-bit signed
  249. function sphUnpackI64 ( $v )
  250. {
  251. list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
  252. // x64
  253. if ( PHP_INT_SIZE>=8 )
  254. {
  255. if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
  256. if ( $lo<0 ) $lo += (1<<32);
  257. return ($hi<<32) + $lo;
  258. }
  259. // x32, int
  260. if ( $hi==0 )
  261. {
  262. if ( $lo>0 )
  263. return $lo;
  264. return sprintf ( "%u", $lo );
  265. }
  266. // x32, int
  267. elseif ( $hi==-1 )
  268. {
  269. if ( $lo<0 )
  270. return $lo;
  271. return sprintf ( "%.0f", $lo - 4294967296.0 );
  272. }
  273. $neg = "";
  274. $c = 0;
  275. if ( $hi<0 )
  276. {
  277. $hi = ~$hi;
  278. $lo = ~$lo;
  279. $c = 1;
  280. $neg = "-";
  281. }
  282. $hi = sprintf ( "%u", $hi );
  283. $lo = sprintf ( "%u", $lo );
  284. // x32, bcmath
  285. if ( function_exists("bcmul") )
  286. return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
  287. // x32, no-bcmath
  288. $hi = (float)$hi;
  289. $lo = (float)$lo;
  290. $q = floor($hi/10000000.0);
  291. $r = $hi - $q*10000000.0;
  292. $m = $lo + $r*4967296.0;
  293. $mq = floor($m/10000000.0);
  294. $l = $m - $mq*10000000.0 + $c;
  295. $h = $q*4294967296.0 + $r*429.0 + $mq;
  296. if ( $l==10000000 )
  297. {
  298. $l = 0;
  299. $h += 1;
  300. }
  301. $h = sprintf ( "%.0f", $h );
  302. $l = sprintf ( "%07.0f", $l );
  303. if ( $h=="0" )
  304. return $neg . sprintf( "%.0f", (float)$l );
  305. return $neg . $h . $l;
  306. }
  307. function sphFixUint ( $value )
  308. {
  309. if ( PHP_INT_SIZE>=8 )
  310. {
  311. // x64 route, workaround broken unpack() in 5.2.2+
  312. if ( $value<0 ) $value += (1<<32);
  313. return $value;
  314. }
  315. else
  316. {
  317. // x32 route, workaround php signed/unsigned braindamage
  318. return sprintf ( "%u", $value );
  319. }
  320. }
  321. /// sphinx searchd client class
  322. class SphinxClient
  323. {
  324. var $_host; ///< searchd host (default is "localhost")
  325. var $_port; ///< searchd port (default is 9312)
  326. var $_offset; ///< how many records to seek from result-set start (default is 0)
  327. var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
  328. var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
  329. var $_weights; ///< per-field weights (default is 1 for all fields)
  330. var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
  331. var $_sortby; ///< attribute to sort by (defualt is "")
  332. var $_min_id; ///< min ID to match (default is 0, which means no limit)
  333. var $_max_id; ///< max ID to match (default is 0, which means no limit)
  334. var $_filters; ///< search filters
  335. var $_groupby; ///< group-by attribute name
  336. var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
  337. var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
  338. var $_groupdistinct;///< group-by count-distinct attribute
  339. var $_maxmatches; ///< max matches to retrieve
  340. var $_cutoff; ///< cutoff to stop searching at (default is 0)
  341. var $_retrycount; ///< distributed retries count
  342. var $_retrydelay; ///< distributed retries delay
  343. var $_anchor; ///< geographical anchor point
  344. var $_indexweights; ///< per-index weights
  345. var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  346. var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
  347. var $_fieldweights; ///< per-field-name weights
  348. var $_overrides; ///< per-query attribute values overrides
  349. var $_select; ///< select-list (attributes or expressions, with optional aliases)
  350. var $_error; ///< last error message
  351. var $_warning; ///< last warning message
  352. var $_connerror; ///< connection error vs remote error flag
  353. var $_reqs; ///< requests array for multi-query
  354. var $_mbenc; ///< stored mbstring encoding
  355. var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
  356. var $_timeout; ///< connect timeout
  357. /////////////////////////////////////////////////////////////////////////////
  358. // common stuff
  359. /////////////////////////////////////////////////////////////////////////////
  360. /// create a new client object and fill defaults
  361. function SphinxClient ()
  362. {
  363. // per-client-object settings
  364. $this->_host = "localhost";
  365. $this->_port = 9312;
  366. $this->_path = false;
  367. $this->_socket = false;
  368. // per-query settings
  369. $this->_offset = 0;
  370. $this->_limit = 20;
  371. $this->_mode = SPH_MATCH_ALL;
  372. $this->_weights = array ();
  373. $this->_sort = SPH_SORT_RELEVANCE;
  374. $this->_sortby = "";
  375. $this->_min_id = 0;
  376. $this->_max_id = 0;
  377. $this->_filters = array ();
  378. $this->_groupby = "";
  379. $this->_groupfunc = SPH_GROUPBY_DAY;
  380. $this->_groupsort = "@group desc";
  381. $this->_groupdistinct= "";
  382. $this->_maxmatches = 1000;
  383. $this->_cutoff = 0;
  384. $this->_retrycount = 0;
  385. $this->_retrydelay = 0;
  386. $this->_anchor = array ();
  387. $this->_indexweights= array ();
  388. $this->_ranker = SPH_RANK_PROXIMITY_BM25;
  389. $this->_maxquerytime= 0;
  390. $this->_fieldweights= array();
  391. $this->_overrides = array();
  392. $this->_select = "*";
  393. $this->_error = ""; // per-reply fields (for single-query case)
  394. $this->_warning = "";
  395. $this->_connerror = false;
  396. $this->_reqs = array (); // requests storage (for multi-query case)
  397. $this->_mbenc = "";
  398. $this->_arrayresult = false;
  399. $this->_timeout = 0;
  400. }
  401. function __destruct()
  402. {
  403. if ( $this->_socket !== false )
  404. fclose ( $this->_socket );
  405. }
  406. /// get last error message (string)
  407. function GetLastError ()
  408. {
  409. return $this->_error;
  410. }
  411. /// get last warning message (string)
  412. function GetLastWarning ()
  413. {
  414. return $this->_warning;
  415. }
  416. /// get last error flag (to tell network connection errors from searchd errors or broken responses)
  417. function IsConnectError()
  418. {
  419. return $this->_connerror;
  420. }
  421. /// set searchd host name (string) and port (integer)
  422. function SetServer ( $host, $port = 0 )
  423. {
  424. assert ( is_string($host) );
  425. if ( $host[0] == '/')
  426. {
  427. $this->_path = 'unix://' . $host;
  428. return;
  429. }
  430. if ( substr ( $host, 0, 7 )=="unix://" )
  431. {
  432. $this->_path = $host;
  433. return;
  434. }
  435. assert ( is_int($port) );
  436. $this->_host = $host;
  437. $this->_port = $port;
  438. $this->_path = '';
  439. }
  440. /// set server connection timeout (0 to remove)
  441. function SetConnectTimeout ( $timeout )
  442. {
  443. assert ( is_numeric($timeout) );
  444. $this->_timeout = $timeout;
  445. }
  446. function _Send ( $handle, $data, $length )
  447. {
  448. if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length )
  449. {
  450. $this->_error = 'connection unexpectedly closed (timed out?)';
  451. $this->_connerror = true;
  452. return false;
  453. }
  454. return true;
  455. }
  456. /////////////////////////////////////////////////////////////////////////////
  457. /// enter mbstring workaround mode
  458. function _MBPush ()
  459. {
  460. $this->_mbenc = "";
  461. if ( ini_get ( "mbstring.func_overload" ) & 2 )
  462. {
  463. $this->_mbenc = mb_internal_encoding();
  464. mb_internal_encoding ( "latin1" );
  465. }
  466. }
  467. /// leave mbstring workaround mode
  468. function _MBPop ()
  469. {
  470. if ( $this->_mbenc )
  471. mb_internal_encoding ( $this->_mbenc );
  472. }
  473. /// connect to searchd server
  474. function _Connect ()
  475. {
  476. if ( $this->_socket!==false )
  477. {
  478. // we are in persistent connection mode, so we have a socket
  479. // however, need to check whether it's still alive
  480. if ( !@feof ( $this->_socket ) )
  481. return $this->_socket;
  482. // force reopen
  483. $this->_socket = false;
  484. }
  485. $errno = 0;
  486. $errstr = "";
  487. $this->_connerror = false;
  488. if ( $this->_path )
  489. {
  490. $host = $this->_path;
  491. $port = 0;
  492. }
  493. else
  494. {
  495. $host = $this->_host;
  496. $port = $this->_port;
  497. }
  498. if ( $this->_timeout<=0 )
  499. $fp = @fsockopen ( $host, $port, $errno, $errstr );
  500. else
  501. $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout );
  502. if ( !$fp )
  503. {
  504. if ( $this->_path )
  505. $location = $this->_path;
  506. else
  507. $location = "{$this->_host}:{$this->_port}";
  508. $errstr = trim ( $errstr );
  509. $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)";
  510. $this->_connerror = true;
  511. return false;
  512. }
  513. // send my version
  514. // this is a subtle part. we must do it before (!) reading back from searchd.
  515. // because otherwise under some conditions (reported on FreeBSD for instance)
  516. // TCP stack could throttle write-write-read pattern because of Nagle.
  517. if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
  518. {
  519. fclose ( $fp );
  520. $this->_error = "failed to send client protocol version";
  521. return false;
  522. }
  523. // check version
  524. list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
  525. $v = (int)$v;
  526. if ( $v<1 )
  527. {
  528. fclose ( $fp );
  529. $this->_error = "expected searchd protocol version 1+, got version '$v'";
  530. return false;
  531. }
  532. return $fp;
  533. }
  534. /// get and check response packet from searchd server
  535. function _GetResponse ( $fp, $client_ver )
  536. {
  537. $response = "";
  538. $len = 0;
  539. $header = fread ( $fp, 8 );
  540. if ( strlen($header)==8 )
  541. {
  542. list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
  543. $left = $len;
  544. while ( $left>0 && !feof($fp) )
  545. {
  546. $chunk = fread ( $fp, $left );
  547. if ( $chunk )
  548. {
  549. $response .= $chunk;
  550. $left -= strlen($chunk);
  551. }
  552. }
  553. }
  554. if ( $this->_socket === false )
  555. fclose ( $fp );
  556. // check response
  557. $read = strlen ( $response );
  558. if ( !$response || $read!=$len )
  559. {
  560. $this->_error = $len
  561. ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
  562. : "received zero-sized searchd response";
  563. return false;
  564. }
  565. // check status
  566. if ( $status==SEARCHD_WARNING )
  567. {
  568. list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
  569. $this->_warning = substr ( $response, 4, $wlen );
  570. return substr ( $response, 4+$wlen );
  571. }
  572. if ( $status==SEARCHD_ERROR )
  573. {
  574. $this->_error = "searchd error: " . substr ( $response, 4 );
  575. return false;
  576. }
  577. if ( $status==SEARCHD_RETRY )
  578. {
  579. $this->_error = "temporary searchd error: " . substr ( $response, 4 );
  580. return false;
  581. }
  582. if ( $status!=SEARCHD_OK )
  583. {
  584. $this->_error = "unknown status code '$status'";
  585. return false;
  586. }
  587. // check version
  588. if ( $ver<$client_ver )
  589. {
  590. $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
  591. $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
  592. }
  593. return $response;
  594. }
  595. /////////////////////////////////////////////////////////////////////////////
  596. // searching
  597. /////////////////////////////////////////////////////////////////////////////
  598. /// set offset and count into result set,
  599. /// and optionally set max-matches and cutoff limits
  600. function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
  601. {
  602. assert ( is_int($offset) );
  603. assert ( is_int($limit) );
  604. assert ( $offset>=0 );
  605. assert ( $limit>0 );
  606. assert ( $max>=0 );
  607. $this->_offset = $offset;
  608. $this->_limit = $limit;
  609. if ( $max>0 )
  610. $this->_maxmatches = $max;
  611. if ( $cutoff>0 )
  612. $this->_cutoff = $cutoff;
  613. }
  614. /// set maximum query time, in milliseconds, per-index
  615. /// integer, 0 means "do not limit"
  616. function SetMaxQueryTime ( $max )
  617. {
  618. assert ( is_int($max) );
  619. assert ( $max>=0 );
  620. $this->_maxquerytime = $max;
  621. }
  622. /// set matching mode
  623. function SetMatchMode ( $mode )
  624. {
  625. assert ( $mode==SPH_MATCH_ALL
  626. || $mode==SPH_MATCH_ANY
  627. || $mode==SPH_MATCH_PHRASE
  628. || $mode==SPH_MATCH_BOOLEAN
  629. || $mode==SPH_MATCH_EXTENDED
  630. || $mode==SPH_MATCH_FULLSCAN
  631. || $mode==SPH_MATCH_EXTENDED2 );
  632. $this->_mode = $mode;
  633. }
  634. /// set ranking mode
  635. function SetRankingMode ( $ranker )
  636. {
  637. assert ( $ranker==SPH_RANK_PROXIMITY_BM25
  638. || $ranker==SPH_RANK_BM25
  639. || $ranker==SPH_RANK_NONE
  640. || $ranker==SPH_RANK_WORDCOUNT
  641. || $ranker==SPH_RANK_PROXIMITY );
  642. $this->_ranker = $ranker;
  643. }
  644. /// set matches sorting mode
  645. function SetSortMode ( $mode, $sortby="" )
  646. {
  647. assert (
  648. $mode==SPH_SORT_RELEVANCE ||
  649. $mode==SPH_SORT_ATTR_DESC ||
  650. $mode==SPH_SORT_ATTR_ASC ||
  651. $mode==SPH_SORT_TIME_SEGMENTS ||
  652. $mode==SPH_SORT_EXTENDED ||
  653. $mode==SPH_SORT_EXPR );
  654. assert ( is_string($sortby) );
  655. assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
  656. $this->_sort = $mode;
  657. $this->_sortby = $sortby;
  658. }
  659. /// bind per-field weights by order
  660. /// DEPRECATED; use SetFieldWeights() instead
  661. function SetWeights ( $weights )
  662. {
  663. assert ( is_array($weights) );
  664. foreach ( $weights as $weight )
  665. assert ( is_int($weight) );
  666. $this->_weights = $weights;
  667. }
  668. /// bind per-field weights by name
  669. function SetFieldWeights ( $weights )
  670. {
  671. assert ( is_array($weights) );
  672. foreach ( $weights as $name=>$weight )
  673. {
  674. assert ( is_string($name) );
  675. assert ( is_int($weight) );
  676. }
  677. $this->_fieldweights = $weights;
  678. }
  679. /// bind per-index weights by name
  680. function SetIndexWeights ( $weights )
  681. {
  682. assert ( is_array($weights) );
  683. foreach ( $weights as $index=>$weight )
  684. {
  685. assert ( is_string($index) );
  686. assert ( is_int($weight) );
  687. }
  688. $this->_indexweights = $weights;
  689. }
  690. /// set IDs range to match
  691. /// only match records if document ID is beetwen $min and $max (inclusive)
  692. function SetIDRange ( $min, $max )
  693. {
  694. assert ( is_numeric($min) );
  695. assert ( is_numeric($max) );
  696. assert ( $min<=$max );
  697. $this->_min_id = $min;
  698. $this->_max_id = $max;
  699. }
  700. /// set values set filter
  701. /// only match records where $attribute value is in given set
  702. function SetFilter ( $attribute, $values, $exclude=false )
  703. {
  704. assert ( is_string($attribute) );
  705. assert ( is_array($values) );
  706. assert ( count($values) );
  707. if ( is_array($values) && count($values) )
  708. {
  709. foreach ( $values as $value )
  710. assert ( is_numeric($value) );
  711. $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
  712. }
  713. }
  714. /// set range filter
  715. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  716. function SetFilterRange ( $attribute, $min, $max, $exclude=false )
  717. {
  718. assert ( is_string($attribute) );
  719. assert ( is_numeric($min) );
  720. assert ( is_numeric($max) );
  721. assert ( $min<=$max );
  722. $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  723. }
  724. /// set float range filter
  725. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  726. function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
  727. {
  728. assert ( is_string($attribute) );
  729. assert ( is_float($min) );
  730. assert ( is_float($max) );
  731. assert ( $min<=$max );
  732. $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  733. }
  734. /// setup anchor point for geosphere distance calculations
  735. /// required to use @geodist in filters and sorting
  736. /// latitude and longitude must be in radians
  737. function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
  738. {
  739. assert ( is_string($attrlat) );
  740. assert ( is_string($attrlong) );
  741. assert ( is_float($lat) );
  742. assert ( is_float($long) );
  743. $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
  744. }
  745. /// set grouping attribute and function
  746. function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
  747. {
  748. assert ( is_string($attribute) );
  749. assert ( is_string($groupsort) );
  750. assert ( $func==SPH_GROUPBY_DAY
  751. || $func==SPH_GROUPBY_WEEK
  752. || $func==SPH_GROUPBY_MONTH
  753. || $func==SPH_GROUPBY_YEAR
  754. || $func==SPH_GROUPBY_ATTR
  755. || $func==SPH_GROUPBY_ATTRPAIR );
  756. $this->_groupby = $attribute;
  757. $this->_groupfunc = $func;
  758. $this->_groupsort = $groupsort;
  759. }
  760. /// set count-distinct attribute for group-by queries
  761. function SetGroupDistinct ( $attribute )
  762. {
  763. assert ( is_string($attribute) );
  764. $this->_groupdistinct = $attribute;
  765. }
  766. /// set distributed retries count and delay
  767. function SetRetries ( $count, $delay=0 )
  768. {
  769. assert ( is_int($count) && $count>=0 );
  770. assert ( is_int($delay) && $delay>=0 );
  771. $this->_retrycount = $count;
  772. $this->_retrydelay = $delay;
  773. }
  774. /// set result set format (hash or array; hash by default)
  775. /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
  776. function SetArrayResult ( $arrayresult )
  777. {
  778. assert ( is_bool($arrayresult) );
  779. $this->_arrayresult = $arrayresult;
  780. }
  781. /// set attribute values override
  782. /// there can be only one override per attribute
  783. /// $values must be a hash that maps document IDs to attribute values
  784. function SetOverride ( $attrname, $attrtype, $values )
  785. {
  786. assert ( is_string ( $attrname ) );
  787. assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
  788. assert ( is_array ( $values ) );
  789. $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
  790. }
  791. /// set select-list (attributes or expressions), SQL-like syntax
  792. function SetSelect ( $select )
  793. {
  794. assert ( is_string ( $select ) );
  795. $this->_select = $select;
  796. }
  797. //////////////////////////////////////////////////////////////////////////////
  798. /// clear all filters (for multi-queries)
  799. function ResetFilters ()
  800. {
  801. $this->_filters = array();
  802. $this->_anchor = array();
  803. }
  804. /// clear groupby settings (for multi-queries)
  805. function ResetGroupBy ()
  806. {
  807. $this->_groupby = "";
  808. $this->_groupfunc = SPH_GROUPBY_DAY;
  809. $this->_groupsort = "@group desc";
  810. $this->_groupdistinct= "";
  811. }
  812. /// clear all attribute value overrides (for multi-queries)
  813. function ResetOverrides ()
  814. {
  815. $this->_overrides = array ();
  816. }
  817. //////////////////////////////////////////////////////////////////////////////
  818. /// connect to searchd server, run given search query through given indexes,
  819. /// and return the search results
  820. function Query ( $query, $index="*", $comment="" )
  821. {
  822. assert ( empty($this->_reqs) );
  823. $this->AddQuery ( $query, $index, $comment );
  824. $results = $this->RunQueries ();
  825. $this->_reqs = array (); // just in case it failed too early
  826. if ( !is_array($results) )
  827. return false; // probably network error; error message should be already filled
  828. $this->_error = $results[0]["error"];
  829. $this->_warning = $results[0]["warning"];
  830. if ( $results[0]["status"]==SEARCHD_ERROR )
  831. return false;
  832. else
  833. return $results[0];
  834. }
  835. /// helper to pack floats in network byte order
  836. function _PackFloat ( $f )
  837. {
  838. $t1 = pack ( "f", $f ); // machine order
  839. list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
  840. return pack ( "N", $t2 );
  841. }
  842. /// add query to multi-query batch
  843. /// returns index into results array from RunQueries() call
  844. function AddQuery ( $query, $index="*", $comment="" )
  845. {
  846. // mbstring workaround
  847. $this->_MBPush ();
  848. // build request
  849. $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
  850. $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
  851. $req .= pack ( "N", strlen($query) ) . $query; // query itself
  852. $req .= pack ( "N", count($this->_weights) ); // weights
  853. foreach ( $this->_weights as $weight )
  854. $req .= pack ( "N", (int)$weight );
  855. $req .= pack ( "N", strlen($index) ) . $index; // indexes
  856. $req .= pack ( "N", 1 ); // id64 range marker
  857. $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range
  858. // filters
  859. $req .= pack ( "N", count($this->_filters) );
  860. foreach ( $this->_filters as $filter )
  861. {
  862. $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
  863. $req .= pack ( "N", $filter["type"] );
  864. switch ( $filter["type"] )
  865. {
  866. case SPH_FILTER_VALUES:
  867. $req .= pack ( "N", count($filter["values"]) );
  868. foreach ( $filter["values"] as $value )
  869. $req .= sphPackI64 ( $value );
  870. break;
  871. case SPH_FILTER_RANGE:
  872. $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
  873. break;
  874. case SPH_FILTER_FLOATRANGE:
  875. $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
  876. break;
  877. default:
  878. assert ( 0 && "internal error: unhandled filter type" );
  879. }
  880. $req .= pack ( "N", $filter["exclude"] );
  881. }
  882. // group-by clause, max-matches count, group-sort clause, cutoff count
  883. $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
  884. $req .= pack ( "N", $this->_maxmatches );
  885. $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
  886. $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
  887. $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
  888. // anchor point
  889. if ( empty($this->_anchor) )
  890. {
  891. $req .= pack ( "N", 0 );
  892. } else
  893. {
  894. $a =& $this->_anchor;
  895. $req .= pack ( "N", 1 );
  896. $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
  897. $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
  898. $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
  899. }
  900. // per-index weights
  901. $req .= pack ( "N", count($this->_indexweights) );
  902. foreach ( $this->_indexweights as $idx=>$weight )
  903. $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
  904. // max query time
  905. $req .= pack ( "N", $this->_maxquerytime );
  906. // per-field weights
  907. $req .= pack ( "N", count($this->_fieldweights) );
  908. foreach ( $this->_fieldweights as $field=>$weight )
  909. $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
  910. // comment
  911. $req .= pack ( "N", strlen($comment) ) . $comment;
  912. // attribute overrides
  913. $req .= pack ( "N", count($this->_overrides) );
  914. foreach ( $this->_overrides as $key => $entry )
  915. {
  916. $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
  917. $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
  918. foreach ( $entry["values"] as $id=>$val )
  919. {
  920. assert ( is_numeric($id) );
  921. assert ( is_numeric($val) );
  922. $req .= sphPackU64 ( $id );
  923. switch ( $entry["type"] )
  924. {
  925. case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
  926. case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break;
  927. default: $req .= pack ( "N", $val ); break;
  928. }
  929. }
  930. }
  931. // select-list
  932. $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
  933. // mbstring workaround
  934. $this->_MBPop ();
  935. // store request to requests array
  936. $this->_reqs[] = $req;
  937. return count($this->_reqs)-1;
  938. }
  939. /// connect to searchd, run queries batch, and return an array of result sets
  940. function RunQueries ()
  941. {
  942. if ( empty($this->_reqs) )
  943. {
  944. $this->_error = "no queries defined, issue AddQuery() first";
  945. return false;
  946. }
  947. // mbstring workaround
  948. $this->_MBPush ();
  949. if (!( $fp = $this->_Connect() ))
  950. {
  951. $this->_MBPop ();
  952. return false;
  953. }
  954. // send query, get response
  955. $nreqs = count($this->_reqs);
  956. $req = join ( "", $this->_reqs );
  957. $len = 4+strlen($req);
  958. $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
  959. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  960. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) )
  961. {
  962. $this->_MBPop ();
  963. return false;
  964. }
  965. // query sent ok; we can reset reqs now
  966. $this->_reqs = array ();
  967. // parse and return response
  968. return $this->_ParseSearchResponse ( $response, $nreqs );
  969. }
  970. /// parse and return search query (or queries) response
  971. function _ParseSearchResponse ( $response, $nreqs )
  972. {
  973. $p = 0; // current position
  974. $max = strlen($response); // max position for checks, to protect against broken responses
  975. $results = array ();
  976. for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
  977. {
  978. $results[] = array();
  979. $result =& $results[$ires];
  980. $result["error"] = "";
  981. $result["warning"] = "";
  982. // extract status
  983. list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  984. $result["status"] = $status;
  985. if ( $status!=SEARCHD_OK )
  986. {
  987. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  988. $message = substr ( $response, $p, $len ); $p += $len;
  989. if ( $status==SEARCHD_WARNING )
  990. {
  991. $result["warning"] = $message;
  992. } else
  993. {
  994. $result["error"] = $message;
  995. continue;
  996. }
  997. }
  998. // read schema
  999. $fields = array ();
  1000. $attrs = array ();
  1001. list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1002. while ( $nfields-->0 && $p<$max )
  1003. {
  1004. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1005. $fields[] = substr ( $response, $p, $len ); $p += $len;
  1006. }
  1007. $result["fields"] = $fields;
  1008. list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1009. while ( $nattrs-->0 && $p<$max )
  1010. {
  1011. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1012. $attr = substr ( $response, $p, $len ); $p += $len;
  1013. list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1014. $attrs[$attr] = $type;
  1015. }
  1016. $result["attrs"] = $attrs;
  1017. // read match count
  1018. list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1019. list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1020. // read matches
  1021. $idx = -1;
  1022. while ( $count-->0 && $p<$max )
  1023. {
  1024. // index into result array
  1025. $idx++;
  1026. // parse document id and weight
  1027. if ( $id64 )
  1028. {
  1029. $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1030. list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1031. }
  1032. else
  1033. {
  1034. list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
  1035. substr ( $response, $p, 8 ) ) );
  1036. $p += 8;
  1037. $doc = sphFixUint($doc);
  1038. }
  1039. $weight = sprintf ( "%u", $weight );
  1040. // create match entry
  1041. if ( $this->_arrayresult )
  1042. $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
  1043. else
  1044. $result["matches"][$doc]["weight"] = $weight;
  1045. // parse and create attributes
  1046. $attrvals = array ();
  1047. foreach ( $attrs as $attr=>$type )
  1048. {
  1049. // handle 64bit ints
  1050. if ( $type==SPH_ATTR_BIGINT )
  1051. {
  1052. $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1053. continue;
  1054. }
  1055. // handle floats
  1056. if ( $type==SPH_ATTR_FLOAT )
  1057. {
  1058. list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1059. list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
  1060. $attrvals[$attr] = $fval;
  1061. continue;
  1062. }
  1063. // handle everything else as unsigned ints
  1064. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1065. if ( $type & SPH_ATTR_MULTI )
  1066. {
  1067. $attrvals[$attr] = array ();
  1068. $nvalues = $val;
  1069. while ( $nvalues-->0 && $p<$max )
  1070. {
  1071. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1072. $attrvals[$attr][] = sphFixUint($val);
  1073. }
  1074. } else
  1075. {
  1076. $attrvals[$attr] = sphFixUint($val);
  1077. }
  1078. }
  1079. if ( $this->_arrayresult )
  1080. $result["matches"][$idx]["attrs"] = $attrvals;
  1081. else
  1082. $result["matches"][$doc]["attrs"] = $attrvals;
  1083. }
  1084. list ( $total, $total_found, $msecs, $words ) =
  1085. array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
  1086. $result["total"] = sprintf ( "%u", $total );
  1087. $result["total_found"] = sprintf ( "%u", $total_found );
  1088. $result["time"] = sprintf ( "%.3f", $msecs/1000 );
  1089. $p += 16;
  1090. while ( $words-->0 && $p<$max )
  1091. {
  1092. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1093. $word = substr ( $response, $p, $len ); $p += $len;
  1094. list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  1095. $result["words"][$word] = array (
  1096. "docs"=>sprintf ( "%u", $docs ),
  1097. "hits"=>sprintf ( "%u", $hits ) );
  1098. }
  1099. }
  1100. $this->_MBPop ();
  1101. return $results;
  1102. }
  1103. /////////////////////////////////////////////////////////////////////////////
  1104. // excerpts generation
  1105. /////////////////////////////////////////////////////////////////////////////
  1106. /// connect to searchd server, and generate exceprts (snippets)
  1107. /// of given documents for given query. returns false on failure,
  1108. /// an array of snippets on success
  1109. function BuildExcerpts ( $docs, $index, $words, $opts=array() )
  1110. {
  1111. assert ( is_array($docs) );
  1112. assert ( is_string($index) );
  1113. assert ( is_string($words) );
  1114. assert ( is_array($opts) );
  1115. $this->_MBPush ();
  1116. if (!( $fp = $this->_Connect() ))
  1117. {
  1118. $this->_MBPop();
  1119. return false;
  1120. }
  1121. /////////////////
  1122. // fixup options
  1123. /////////////////
  1124. if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
  1125. if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
  1126. if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
  1127. if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
  1128. if ( !isset($opts["around"]) ) $opts["around"] = 5;
  1129. if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
  1130. if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
  1131. if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
  1132. if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
  1133. /////////////////
  1134. // build request
  1135. /////////////////
  1136. // v.1.0 req
  1137. $flags = 1; // remove spaces
  1138. if ( $opts["exact_phrase"] ) $flags |= 2;
  1139. if ( $opts["single_passage"] ) $flags |= 4;
  1140. if ( $opts["use_boundaries"] ) $flags |= 8;
  1141. if ( $opts["weight_order"] ) $flags |= 16;
  1142. $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
  1143. $req .= pack ( "N", strlen($index) ) . $index; // req index
  1144. $req .= pack ( "N", strlen($words) ) . $words; // req words
  1145. // options
  1146. $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
  1147. $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
  1148. $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
  1149. $req .= pack ( "N", (int)$opts["limit"] );
  1150. $req .= pack ( "N", (int)$opts["around"] );
  1151. // documents
  1152. $req .= pack ( "N", count($docs) );
  1153. foreach ( $docs as $doc )
  1154. {
  1155. assert ( is_string($doc) );
  1156. $req .= pack ( "N", strlen($doc) ) . $doc;
  1157. }
  1158. ////////////////////////////
  1159. // send query, get response
  1160. ////////////////////////////
  1161. $len = strlen($req);
  1162. $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
  1163. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  1164. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) )
  1165. {
  1166. $this->_MBPop ();
  1167. return false;
  1168. }
  1169. //////////////////
  1170. // parse response
  1171. //////////////////
  1172. $pos = 0;
  1173. $res = array ();
  1174. $rlen = strlen($response);
  1175. for ( $i=0; $i<count($docs); $i++ )
  1176. {
  1177. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  1178. $pos += 4;
  1179. if ( $pos+$len > $rlen )
  1180. {
  1181. $this->_error = "incomplete reply";
  1182. $this->_MBPop ();
  1183. return false;
  1184. }
  1185. $res[] = $len ? substr ( $response, $pos, $len ) : "";
  1186. $pos += $len;
  1187. }
  1188. $this->_MBPop ();
  1189. return $res;
  1190. }
  1191. /////////////////////////////////////////////////////////////////////////////
  1192. // keyword generation
  1193. /////////////////////////////////////////////////////////////////////////////
  1194. /// connect to searchd server, and generate keyword list for a given query
  1195. /// returns false on failure,
  1196. /// an array of words on success
  1197. function BuildKeywords ( $query, $index, $hits )
  1198. {
  1199. assert ( is_string($query) );
  1200. assert ( is_string($index) );
  1201. assert ( is_bool($hits) );
  1202. $this->_MBPush ();
  1203. if (!( $fp = $this->_Connect() ))
  1204. {
  1205. $this->_MBPop();
  1206. return false;
  1207. }
  1208. /////////////////
  1209. // build request
  1210. /////////////////
  1211. // v.1.0 req
  1212. $req = pack ( "N", strlen($query) ) . $query; // req query
  1213. $req .= pack ( "N", strlen($index) ) . $index; // req index
  1214. $req .= pack ( "N", (int)$hits );
  1215. ////////////////////////////
  1216. // send query, get response
  1217. ////////////////////////////
  1218. $len = strlen($req);
  1219. $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
  1220. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  1221. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) )
  1222. {
  1223. $this->_MBPop ();
  1224. return false;
  1225. }
  1226. //////////////////
  1227. // parse response
  1228. //////////////////
  1229. $pos = 0;
  1230. $res = array ();
  1231. $rlen = strlen($response);
  1232. list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  1233. $pos += 4;
  1234. for ( $i=0; $i<$nwords; $i++ )
  1235. {
  1236. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  1237. $tokenized = $len ? substr ( $response, $pos, $len ) : "";
  1238. $pos += $len;
  1239. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  1240. $normalized = $len ? substr ( $response, $pos, $len ) : "";
  1241. $pos += $len;
  1242. $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
  1243. if ( $hits )
  1244. {
  1245. list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
  1246. $pos += 8;
  1247. $res [$i]["docs"] = $ndocs;
  1248. $res [$i]["hits"] = $nhits;
  1249. }
  1250. if ( $pos > $rlen )
  1251. {
  1252. $this->_error = "incomplete reply";
  1253. $this->_MBPop ();
  1254. return false;
  1255. }
  1256. }
  1257. $this->_MBPop ();
  1258. return $res;
  1259. }
  1260. function EscapeString ( $string )
  1261. {
  1262. $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
  1263. $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
  1264. return str_replace ( $from, $to, $string );
  1265. }
  1266. /////////////////////////////////////////////////////////////////////////////
  1267. // attribute updates
  1268. /////////////////////////////////////////////////////////////////////////////
  1269. /// batch update given attributes in given rows in given indexes
  1270. /// returns amount of updated documents (0 or more) on success, or -1 on failure
  1271. function UpdateAttributes ( $index, $attrs, $values, $mva=false )
  1272. {
  1273. // verify everything
  1274. assert ( is_string($index) );
  1275. assert ( is_bool($mva) );
  1276. assert ( is_array($attrs) );
  1277. foreach ( $attrs as $attr )
  1278. assert ( is_string($attr) );
  1279. assert ( is_array($values) );
  1280. foreach ( $values as $id=>$entry )
  1281. {
  1282. assert ( is_numeric($id) );
  1283. assert ( is_array($entry) );
  1284. assert ( count($entry)==count($attrs) );
  1285. foreach ( $entry as $v )
  1286. {
  1287. if ( $mva )
  1288. {
  1289. assert ( is_array($v) );
  1290. foreach ( $v as $vv )
  1291. assert ( is_int($vv) );
  1292. } else
  1293. assert ( is_int($v) );
  1294. }
  1295. }
  1296. // build request
  1297. $req = pack ( "N", strlen($index) ) . $index;
  1298. $req .= pack ( "N", count($attrs) );
  1299. foreach ( $attrs as $attr )
  1300. {
  1301. $req .= pack ( "N", strlen($attr) ) . $attr;
  1302. $req .= pack ( "N", $mva ? 1 : 0 );
  1303. }
  1304. $req .= pack ( "N", count($values) );
  1305. foreach ( $values as $id=>$entry )
  1306. {
  1307. $req .= sphPackU64 ( $id );
  1308. foreach ( $entry as $v )
  1309. {
  1310. $req .= pack ( "N", $mva ? count($v) : $v );
  1311. if ( $mva )
  1312. foreach ( $v as $vv )
  1313. $req .= pack ( "N", $vv );
  1314. }
  1315. }
  1316. // connect, send query, get response
  1317. if (!( $fp = $this->_Connect() ))
  1318. return -1;
  1319. $len = strlen($req);
  1320. $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
  1321. if ( !$this->_Send ( $fp, $req, $len+8 ) )
  1322. return -1;
  1323. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
  1324. return -1;
  1325. // parse response
  1326. list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
  1327. return $updated;
  1328. }
  1329. /////////////////////////////////////////////////////////////////////////////
  1330. // persistent connections
  1331. /////////////////////////////////////////////////////////////////////////////
  1332. function Open()
  1333. {
  1334. if ( $this->_socket !== false )
  1335. {
  1336. $this->_error = 'already connected';
  1337. return false;
  1338. }
  1339. if ( !$fp = $this->_Connect() )
  1340. return false;
  1341. // command, command version = 0, body length = 4, body = 1
  1342. $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 );
  1343. if ( !$this->_Send ( $fp, $req, 12 ) )
  1344. return false;
  1345. $this->_socket = $fp;
  1346. return true;
  1347. }
  1348. function Close()
  1349. {
  1350. if ( $this->_socket === false )
  1351. {
  1352. $this->_error = 'not connected';
  1353. return false;
  1354. }
  1355. fclose ( $this->_socket );
  1356. $this->_socket = false;
  1357. return true;
  1358. }
  1359. //////////////////////////////////////////////////////////////////////////
  1360. // status
  1361. //////////////////////////////////////////////////////////////////////////
  1362. function Status ()
  1363. {
  1364. $this->_MBPush ();
  1365. if (!( $fp = $this->_Connect() ))
  1366. {
  1367. $this->_MBPop();
  1368. return false;
  1369. }
  1370. $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1
  1371. if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
  1372. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) )
  1373. {
  1374. $this->_MBPop ();
  1375. return false;
  1376. }
  1377. $res = substr ( $response, 4 ); // just ignore length, error handling, etc
  1378. $p = 0;
  1379. list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  1380. $res = array();
  1381. for ( $i=0; $i<$rows; $i++ )
  1382. for ( $j=0; $j<$cols; $j++ )
  1383. {
  1384. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1385. $res[$i][] = substr ( $response, $p, $len ); $p += $len;
  1386. }
  1387. $this->_MBPop ();
  1388. return $res;
  1389. }
  1390. }
  1391. //
  1392. // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
  1393. //