diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..7aebc83 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +# EditorConfig is awesome: http://EditorConfig.org +root = true + +[*] +end_of_line = lf +insert_final_newline = true + +[**.{php,md}] +charset = utf-8 +indent_style = space +indent_size = 4 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..de4f67d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,9 @@ +/.gitattributes export-ignore +/.gitignore export-ignore +/.github export-ignore +/.php-cs-fixer.php export-ignore +/.editorconfig export-ignore +/phpunit.xml export-ignore +/phpunit.xml.dist export-ignore +/phpstan.neon export-ignore +/tests export-ignore diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php new file mode 100644 index 0000000..b62b596 --- /dev/null +++ b/.php-cs-fixer.php @@ -0,0 +1,36 @@ +setUsingCache(true) + ->setRiskyAllowed(true) + ->setRules([ + '@PHP71Migration' => true, + '@PHPUnit75Migration:risky' => true, + '@PSR12' => true, + 'header_comment' => ['header' => $fileHeaderComment], + ]) + ->setFinder( + PhpCsFixer\Finder::create() + ->ignoreVCSIgnored(true) + ->files() + ->name('*.php') + ->exclude('vendor') + ->in(__DIR__) + ); diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/assets/ip2region.xdb b/assets/ip2region.xdb new file mode 100644 index 0000000..31f96a1 Binary files /dev/null and b/assets/ip2region.xdb differ diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..bfd0251 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,13 @@ +parameters: + level: 5 + checkMissingIterableValueType: false + checkFunctionNameCase: true + reportUnmatchedIgnoredErrors: false + checkGenericClassInNonGenericObjectType: false + inferPrivatePropertyTypeFromConstructor: true + treatPhpDocTypesAsCertain: false + paths: + - src + - tests + ignoreErrors: + - '#PHPDoc tag .* has invalid value.*#' diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..16a418d --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,25 @@ + + + + + ./tests/ + + + + + src/ + + + + + + diff --git a/src/Ip2Region.php b/src/Ip2Region.php index e79727d..29377b3 100644 --- a/src/Ip2Region.php +++ b/src/Ip2Region.php @@ -1,397 +1,65 @@ - * @date 2015-10-29 + * Copyright 2022 The Ip2Region Authors. All rights reserved. + * Use of this source code is governed by a Apache2.0-style + * license that can be found in the LICENSE file. + * + * @link https://github.com/chinayin/ip2region-sdk-php + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. */ -namespace lionsoul2014; - -defined('INDEX_BLOCK_LENGTH') or define('INDEX_BLOCK_LENGTH', 12); -defined('TOTAL_HEADER_LENGTH') or define('TOTAL_HEADER_LENGTH', 8192); +namespace ip2region; class Ip2Region { - /** - * db file handler - */ - private $dbFileHandler = null; + public const XDB_PATH = __DIR__ . '/../assets/ip2region.xdb'; /** - * header block info + * 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象。 + * @return XdbSearcher */ - private $HeaderSip = null; - private $HeaderPtr = null; - private $headerLen = 0; - - /** - * super block index info - */ - private $firstIndexPtr = 0; - private $lastIndexPtr = 0; - private $totalBlocks = 0; - - /** - * for memory mode only - * the original db binary string - */ - private $dbBinStr = null; - private $dbFile = null; - - /** - * construct method - * - * @param $ip2regionFile - */ - public function __construct($ip2regionFile = null) + public static function newWithFileOnly() { - null === $ip2regionFile && - $ip2regionFile = __DIR__ . '/../assets/ip2region.db'; - $this->dbFile = $ip2regionFile; + return XdbSearcher::newWithFileOnly(self::XDB_PATH); } /** - * all the db binary string will be loaded into memory - * then search the memory only and this will a lot faster than disk base search + * 缓存 VectorIndex 索引 * - * @Note: - * invoke it once before put it to public invoke could make it thread safe - * - * @param $ip - * - * @return array|null - * @throws \Exception + * 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象,但是都共享统一的只读 vectorIndex。 + * @return XdbSearcher + * @throws \RuntimeException */ - public function memorySearch($ip) + public static function newWithVectorIndex() { - //check and load the binary string for the first time - if ($this->dbBinStr == null) { - $this->dbBinStr = file_get_contents($this->dbFile); - if ($this->dbBinStr == false) { - throw new \Exception("Fail to open the db file {$this->dbFile}"); - } - - $this->firstIndexPtr = self::getLong($this->dbBinStr, 0); - $this->lastIndexPtr = self::getLong($this->dbBinStr, 4); - $this->totalBlocks = ($this->lastIndexPtr - $this->firstIndexPtr) / INDEX_BLOCK_LENGTH + 1; + // 从 path 加载 VectorIndex 缓存,把下述的 vIndex 变量缓存到内存里面。 + $vIndex = XdbSearcher::loadVectorIndexFromFile(self::XDB_PATH); + if ($vIndex === null) { + throw new \RuntimeException(sprintf("failed to load vector index from '%s'", self::XDB_PATH)); } - - if (is_string($ip)) { - $ip = self::safeIp2long($ip); - } - - //binary search to define the data - $l = 0; - $h = $this->totalBlocks; - $dataPtr = 0; - while ($l <= $h) { - $m = (($l + $h) >> 1); - $p = $this->firstIndexPtr + $m * INDEX_BLOCK_LENGTH; - $sip = self::getLong($this->dbBinStr, $p); - if ($ip < $sip) { - $h = $m - 1; - } else { - $eip = self::getLong($this->dbBinStr, $p + 4); - if ($ip > $eip) { - $l = $m + 1; - } else { - $dataPtr = self::getLong($this->dbBinStr, $p + 8); - break; - } - } - } - - //not matched just stop it here - if ($dataPtr == 0) { - return null; - } - - //get the data - $dataLen = (($dataPtr >> 24) & 0xFF); - $dataPtr = ($dataPtr & 0x00FFFFFF); - - return [ - 'city_id' => self::getLong($this->dbBinStr, $dataPtr), - 'region' => substr($this->dbBinStr, $dataPtr + 4, $dataLen - 4) - ]; + // 使用全局的 vIndex 创建带 VectorIndex 缓存的查询对象。 + return XdbSearcher::newWithVectorIndex(self::XDB_PATH, $vIndex); } /** - * get the data block through the specified ip address or long ip numeric with binary search algorithm + * 缓存整个 xdb 数据 * - * @param $ip - * - * @return array|null - * @throws \Exception + * 备注:并发使用,用整个 xdb 缓存创建的 searcher 对象可以安全用于并发。 + * @return XdbSearcher + * @throws \RuntimeException */ - public function binarySearch($ip) + public static function newWithBuffer() { - //check and conver the ip address - if (is_string($ip)) { - $ip = self::safeIp2long($ip); + // 从 path 加载整个 xdb 到内存。 + $cBuff = XdbSearcher::loadContentFromFile(self::XDB_PATH); + if ($cBuff === null) { + throw new \RuntimeException(sprintf("failed to load content buffer from '%s'", self::XDB_PATH)); } - if ($this->totalBlocks == 0) { - //check and open the original db file - if ($this->dbFileHandler == null) { - $this->dbFileHandler = fopen($this->dbFile, 'r'); - if ($this->dbFileHandler == false) { - throw new \Exception("Fail to open the db file {$this->dbFile}"); - } - } - - fseek($this->dbFileHandler, 0); - $superBlock = fread($this->dbFileHandler, 8); - - $this->firstIndexPtr = self::getLong($superBlock, 0); - $this->lastIndexPtr = self::getLong($superBlock, 4); - $this->totalBlocks = ($this->lastIndexPtr - $this->firstIndexPtr) / INDEX_BLOCK_LENGTH + 1; - } - - //binary search to define the data - $l = 0; - $h = $this->totalBlocks; - $dataPtr = 0; - while ($l <= $h) { - $m = (($l + $h) >> 1); - $p = $m * INDEX_BLOCK_LENGTH; - - fseek($this->dbFileHandler, $this->firstIndexPtr + $p); - $buffer = fread($this->dbFileHandler, INDEX_BLOCK_LENGTH); - $sip = self::getLong($buffer, 0); - if ($ip < $sip) { - $h = $m - 1; - } else { - $eip = self::getLong($buffer, 4); - if ($ip > $eip) { - $l = $m + 1; - } else { - $dataPtr = self::getLong($buffer, 8); - break; - } - } - } - - //not matched just stop it here - if ($dataPtr == 0) { - return null; - } - - //get the data - $dataLen = (($dataPtr >> 24) & 0xFF); - $dataPtr = ($dataPtr & 0x00FFFFFF); - - fseek($this->dbFileHandler, $dataPtr); - $data = fread($this->dbFileHandler, $dataLen); - - return [ - 'city_id' => self::getLong($data, 0), - 'region' => substr($data, 4) - ]; - } - - /** - * get the data block associated with the specified ip with b-tree search algorithm - * - * @Note: not thread safe - * - * @param $ip - * - * @return array|null - * @throws \Exception - */ - public function btreeSearch($ip) - { - if (is_string($ip)) { - $ip = self::safeIp2long($ip); - } - - //check and load the header - if ($this->HeaderSip == null) { - //check and open the original db file - if ($this->dbFileHandler == null) { - $this->dbFileHandler = fopen($this->dbFile, 'r'); - if ($this->dbFileHandler == false) { - throw new \Exception("Fail to open the db file {$this->dbFile}"); - } - } - - fseek($this->dbFileHandler, 8); - $buffer = fread($this->dbFileHandler, TOTAL_HEADER_LENGTH); - - //fill the header - $idx = 0; - $this->HeaderSip = []; - $this->HeaderPtr = []; - for ($i = 0; $i < TOTAL_HEADER_LENGTH; $i += 8) { - $startIp = self::getLong($buffer, $i); - $dataPtr = self::getLong($buffer, $i + 4); - if ($dataPtr == 0) { - break; - } - - $this->HeaderSip[] = $startIp; - $this->HeaderPtr[] = $dataPtr; - $idx++; - } - - $this->headerLen = $idx; - } - - //1. define the index block with the binary search - $l = 0; - $h = $this->headerLen; - $sptr = 0; - $eptr = 0; - while ($l <= $h) { - $m = (($l + $h) >> 1); - - //perfetc matched, just return it - if ($ip == $this->HeaderSip[$m]) { - if ($m > 0) { - $sptr = $this->HeaderPtr[$m - 1]; - $eptr = $this->HeaderPtr[$m]; - } else { - $sptr = $this->HeaderPtr[$m]; - $eptr = $this->HeaderPtr[$m + 1]; - } - - break; - } - - //less then the middle value - if ($ip < $this->HeaderSip[$m]) { - if ($m == 0) { - $sptr = $this->HeaderPtr[$m]; - $eptr = $this->HeaderPtr[$m + 1]; - break; - } else { - if ($ip > $this->HeaderSip[$m - 1]) { - $sptr = $this->HeaderPtr[$m - 1]; - $eptr = $this->HeaderPtr[$m]; - break; - } - } - $h = $m - 1; - } else { - if ($m == $this->headerLen - 1) { - $sptr = $this->HeaderPtr[$m - 1]; - $eptr = $this->HeaderPtr[$m]; - break; - } else { - if ($ip <= $this->HeaderSip[$m + 1]) { - $sptr = $this->HeaderPtr[$m]; - $eptr = $this->HeaderPtr[$m + 1]; - break; - } - } - $l = $m + 1; - } - } - - //match nothing just stop it - if ($sptr == 0) { - return null; - } - - //2. search the index blocks to define the data - $blockLen = $eptr - $sptr; - fseek($this->dbFileHandler, $sptr); - $index = fread($this->dbFileHandler, $blockLen + INDEX_BLOCK_LENGTH); - - $dataPtr = 0; - $l = 0; - $h = $blockLen / INDEX_BLOCK_LENGTH; - while ($l <= $h) { - $m = (($l + $h) >> 1); - $p = (int)($m * INDEX_BLOCK_LENGTH); - $sip = self::getLong($index, $p); - if ($ip < $sip) { - $h = $m - 1; - } else { - $eip = self::getLong($index, $p + 4); - if ($ip > $eip) { - $l = $m + 1; - } else { - $dataPtr = self::getLong($index, $p + 8); - break; - } - } - } - - //not matched - if ($dataPtr == 0) { - return null; - } - - //3. get the data - $dataLen = (($dataPtr >> 24) & 0xFF); - $dataPtr = ($dataPtr & 0x00FFFFFF); - - fseek($this->dbFileHandler, $dataPtr); - $data = fread($this->dbFileHandler, $dataLen); - - return [ - 'city_id' => self::getLong($data, 0), - 'region' => substr($data, 4) - ]; - } - - /** - * safe self::safeIp2long function - * - * @param $ip - * - * @return int|string - */ - public static function safeIp2long($ip) - { - $ip = ip2long($ip); - - // convert signed int to unsigned int if on 32 bit operating system - if ($ip < 0 && PHP_INT_SIZE == 4) { - $ip = sprintf("%u", $ip); - } - - return $ip; - } - - /** - * read a long from a byte buffer - * - * @param $b - * @param $offset - * - * @return int|string - */ - public static function getLong($b, $offset) - { - $val = ( - (ord($b[$offset++])) | - (ord($b[$offset++]) << 8) | - (ord($b[$offset++]) << 16) | - (ord($b[$offset]) << 24) - ); - - // convert signed int to unsigned int if on 32 bit operating system - if ($val < 0 && PHP_INT_SIZE == 4) { - $val = sprintf("%u", $val); - } - - return $val; - } - - /** - * destruct method, resource destroy - */ - public function __destruct() - { - if ($this->dbFileHandler != null) { - fclose($this->dbFileHandler); - } - - $this->dbBinStr = null; - $this->HeaderSip = null; - $this->HeaderPtr = null; + return XdbSearcher::newWithBuffer($cBuff); } } diff --git a/tests/Ip2RegionTest.php b/tests/Ip2RegionTest.php new file mode 100644 index 0000000..9d9bb6d --- /dev/null +++ b/tests/Ip2RegionTest.php @@ -0,0 +1,64 @@ + '中国|0|北京|北京市|电信', + '123.151.137.18' => '中国|0|天津|天津市|电信', + '103.100.62.111' => '中国|0|香港|0|0', + '20.205.243.166' => '美国|0|0|0|微软', + ]; + + public static function builder($cachePolicy) + { + if ('vectorIndex' === $cachePolicy) { + return Ip2Region::newWithVectorIndex(); + } elseif ('content' === $cachePolicy) { + return Ip2Region::newWithBuffer(); + } + return Ip2Region::newWithFileOnly(); + } + + private function search($searcher, $ip, $expected) + { + $ts = self::now(); + $r = $searcher->search($ip); + printf( + "ip: %s, region: %s, ioCount: %d, took: %.5f ms\n", + $ip, + $r, + $searcher->getIOCount(), + self::now() - $ts + ); + $this->assertEquals($expected, $r); + } + + public function testSearch() + { + foreach (['file', 'vectorIndex', 'content'] as $cachePolicy) { + printf("cachePolicy = %s\n", $cachePolicy); + $searcher = $this->builder($cachePolicy); + foreach ($this->ips as $ip => $expected) { + $this->search($searcher, $ip, $expected); + } + printf(PHP_EOL); + } + } +}