diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..7aebc83 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +# EditorConfig is awesome: http://EditorConfig.org +root = true + +[*] +end_of_line = lf +insert_final_newline = true + +[**.{php,md}] +charset = utf-8 +indent_style = space +indent_size = 4 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..de4f67d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,9 @@ +/.gitattributes export-ignore +/.gitignore export-ignore +/.github export-ignore +/.php-cs-fixer.php export-ignore +/.editorconfig export-ignore +/phpunit.xml export-ignore +/phpunit.xml.dist export-ignore +/phpstan.neon export-ignore +/tests export-ignore diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..48ee32d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + target-branch: "2.x" + schedule: + interval: "monthly" + - package-ecosystem: "composer" + directory: "/" + target-branch: "2.x" + schedule: + interval: "daily" diff --git a/.github/workflows/php.yml b/.github/workflows/php.yml new file mode 100644 index 0000000..33325e4 --- /dev/null +++ b/.github/workflows/php.yml @@ -0,0 +1,42 @@ +name: PHP Composer + +on: [ push, pull_request ] + +jobs: + build: + name: build (PHP ${{ matrix.php-versions }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + php-versions: [ '7.2', '7.4', '8.0', '8.1' ] + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup PHP, with composer and extensions + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + tools: pecl + extensions: mbstring, dom + + - name: Get composer cache directory + id: composer-cache + run: echo "::set-output name=dir::$(composer config cache-files-dir)" + + - name: Cache composer dependencies + uses: actions/cache@v3 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.json') }} + restore-keys: ${{ runner.os }}-composer- + + - name: Validate composer.json and composer.lock + run: composer validate + + - name: Install Composer dependencies + run: composer install --no-progress --no-suggest --prefer-dist --optimize-autoloader + + - name: Run test suite + run: composer travis diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c992da --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/vendor/ +/composer.lock +.idea +Thumbs.db +ehthumbs.db +Desktop.ini +.DS_Store +.php-cs-fixer.cache +phpunit.xml diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php new file mode 100644 index 0000000..2a01cb8 --- /dev/null +++ b/.php-cs-fixer.php @@ -0,0 +1,36 @@ +setUsingCache(true) + ->setRiskyAllowed(true) + ->setRules([ + '@PHP71Migration' => true, + '@PHPUnit75Migration:risky' => true, + '@PSR12' => true, + 'header_comment' => ['header' => $fileHeaderComment], + ]) + ->setFinder( + PhpCsFixer\Finder::create() + ->ignoreVCSIgnored(true) + ->files() + ->name('*.php') + ->exclude('vendor') + ->in(__DIR__) + ); diff --git a/README.md b/README.md new file mode 100644 index 0000000..c3983bd --- /dev/null +++ b/README.md @@ -0,0 +1,99 @@ +# ip2region SDK for PHP + +[![Author](https://img.shields.io/badge/author-@chinayin-blue.svg)](https://github.com/chinayin) +[![Software License](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg)](LICENSE) +[![Latest Version](https://img.shields.io/packagist/v/chinayin/ip2region-core.svg)](https://packagist.org/packages/chinayin/ip2region-core) +[![Total Downloads](https://img.shields.io/packagist/dt/chinayin/ip2region-core.svg)](https://packagist.org/packages/chinayin/ip2region-core) +![php 7.1+](https://img.shields.io/badge/php-min%207.1-red.svg) + +### Installation + +运行环境要求 PHP 7.1 及以上版本,以及[cURL](http://php.net/manual/zh/book.curl.php)。 + +#### 官方原生查询包 + +特点:包更小,数据路径自定义 + +> composer require chinayin/ip2region-core + +#### 包含数据查询包 + +特点:`xdb数据`封装在composer包内,数据会不定期更新 + +使用方法:[github.com/chinayin/ip2region](https://github.com/chinayin/ip2region-sdk-php) + +> composer require chinayin/ip2region + +### Quick Examples + +#### 完全基于文件的查询 + +```php +use ip2region\XdbSearcher; + +$ip = '1.2.3.4'; +$xdb = './ip2region.xdb'; +try { + $region = XdbSearcher::newWithFileOnly($xdb)->search($ip); + var_dump($region); +} catch (\Exception $e) { + var_dump($e->getMessage()); +} +``` + +> 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象。 + +#### 缓存 VectorIndex 索引 + +如果你的 php 母环境支持,可以预先加载 vectorIndex 缓存,然后做成全局变量,每次创建 Searcher 的时候使用全局的 +vectorIndex,可以减少一次固定的 IO 操作从而加速查询,减少 io 压力。 + +```php +use ip2region\XdbSearcher; + +$ip = '1.2.3.4'; +$xdb = './ip2region.xdb'; +try { + // 1、加载 VectorIndex 缓存,把下述的 vIndex 变量缓存到内存里面。 + $vIndex = XdbSearcher::loadVectorFromFile($xdb); + if (null === $vIndex) { +throw new \RuntimeException("failed to load vector index from '$xdb'."); + } + // 2、使用全局的 vIndex 创建带 VectorIndex 缓存的查询对象。 + $searcher = XdbSearcher::newWithVectorIndex($xdb, $vIndex); + // 3、查询 + $region = $searcher->search($ip); + var_dump($region); +} catch (\Exception $e) { + var_dump($e->getMessage()); +} +``` + +> 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象,但是都共享统一的只读 vectorIndex。 + +#### 缓存整个 xdb 数据 + +如果你的 PHP 母环境支持,可以预先加载整个 xdb 的数据到内存,这样可以实现完全基于内存的查询,类似之前的 memory search 查询。 + +```php +use ip2region\XdbSearcher; + +$ip = '1.2.3.4'; +$xdb = './ip2region.xdb'; +try { + // 1、加载整个 xdb 到内存。 + $cBuff = XdbSearcher::loadContentFromFile($xdb); + if (null === $cBuff) { + throw new \RuntimeException("failed to load content buffer from '$xdb'"); + } + // 2、使用全局的 cBuff 创建带完全基于内存的查询对象。 + $searcher = XdbSearcher::newWithBuffer($cBuff); + // 3、查询 + $region = $searcher->search($ip); + var_dump($region); +} catch (\Exception $e) { + var_dump($e->getMessage()); +} +``` + +> 备注:并发使用,用整个 xdb 缓存创建的 searcher 对象可以安全用于并发。 diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..b21dd99 --- /dev/null +++ b/composer.json @@ -0,0 +1,43 @@ +{ + "name": "chinayin/ip2region-core", + "description": "Ip2region (2.0 - xdb) is a offline IP address manager framework and locator with ten microsecond searching performance. xdb engine implementation for many programming languages\n\n", + "authors": [ + { + "name": "lionsoul2014", + "email": "1187582057@qq.com" + }, + { + "name": "chinayin", + "email": "whereismoney@qq.com" + } + ], + "license": "Apache-2.0", + "require": { + "PHP": ">=7.1" + }, + "require-dev": { + "phpunit/phpunit": "^6.0|^9.5", + "friendsofphp/php-cs-fixer": "^3.0", + "phpstan/phpstan": "^1.0" + }, + "autoload": { + "psr-4": { + "ip2region\\": "src" + } + }, + "autoload-dev": { + "psr-4": { + "ip2region\\Tests\\": "tests" + } + }, + "scripts": { + "test": "vendor/bin/phpunit", + "test-ci": "vendor/bin/phpunit --coverage-text", + "lint": "vendor/bin/php-cs-fixer fix -v", + "analyse": "vendor/bin/phpstan analyse", + "travis": [ + "composer lint", + "composer analyse" + ] + } +} diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..bfd0251 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,13 @@ +parameters: + level: 5 + checkMissingIterableValueType: false + checkFunctionNameCase: true + reportUnmatchedIgnoredErrors: false + checkGenericClassInNonGenericObjectType: false + inferPrivatePropertyTypeFromConstructor: true + treatPhpDocTypesAsCertain: false + paths: + - src + - tests + ignoreErrors: + - '#PHPDoc tag .* has invalid value.*#' diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..16a418d --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,25 @@ + + + + + ./tests/ + + + + + src/ + + + + + + diff --git a/src/XdbSearcher.php b/src/XdbSearcher.php new file mode 100644 index 0000000..fb1a883 --- /dev/null +++ b/src/XdbSearcher.php @@ -0,0 +1,368 @@ +vectorIndex = null; + $this->contentBuff = $cBuff; + } else { + // open the xdb binary file + $this->handle = fopen($dbFile, "r"); + if ($this->handle === false) { + throw new \Exception("failed to open xdb file '%s'", $dbFile); + } + + $this->vectorIndex = $vectorIndex; + } + } + + public function close() + { + if ($this->handle != null) { + fclose($this->handle); + } + } + + public function getIOCount() + { + return $this->ioCount; + } + + /** + * find the region info for the specified ip address + * @throws \Exception + */ + public function search($ip) + { + // check and convert the sting ip to a 4-bytes long + if (is_string($ip)) { + $t = self::ip2long($ip); + if ($t === null) { + throw new \Exception("invalid ip address `$ip`"); + } + $ip = $t; + } + + // reset the global counter + $this->ioCount = 0; + + // locate the segment index block based on the vector index + $il0 = ($ip >> 24) & 0xFF; + $il1 = ($ip >> 16) & 0xFF; + $idx = $il0 * self::VectorIndexCols * self::VectorIndexSize + $il1 * self::VectorIndexSize; + if ($this->vectorIndex != null) { + $sPtr = self::getLong($this->vectorIndex, $idx); + $ePtr = self::getLong($this->vectorIndex, $idx + 4); + } else { + if ($this->contentBuff != null) { + $sPtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx); + $ePtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx + 4); + } else { + // read the vector index block + $buff = $this->read(self::HeaderInfoLength + $idx, 8); + if ($buff === null) { + throw new \Exception("failed to read vector index at ${idx}"); + } + + $sPtr = self::getLong($buff, 0); + $ePtr = self::getLong($buff, 4); + } + } + + // printf("sPtr: %d, ePtr: %d\n", $sPtr, $ePtr); + + // binary search the segment index to get the region info + $dataLen = 0; + $dataPtr = null; + $l = 0; + $h = ($ePtr - $sPtr) / self::SegmentIndexSize; + while ($l <= $h) { + $m = ($l + $h) >> 1; + $p = $sPtr + $m * self::SegmentIndexSize; + + // read the segment index + $buff = $this->read($p, self::SegmentIndexSize); + if ($buff == null) { + throw new \Exception("failed to read segment index at ${p}"); + } + + $sip = self::getLong($buff, 0); + if ($ip < $sip) { + $h = $m - 1; + } else { + $eip = self::getLong($buff, 4); + if ($ip > $eip) { + $l = $m + 1; + } else { + $dataLen = self::getShort($buff, 8); + $dataPtr = self::getLong($buff, 10); + break; + } + } + } + + // match nothing interception. + // @TODO: could this even be a case ? + // printf("dataLen: %d, dataPtr: %d\n", $dataLen, $dataPtr); + if ($dataPtr == null) { + return null; + } + + // load and return the region data + $buff = $this->read($dataPtr, $dataLen); + if ($buff == null) { + return null; + } + + return $buff; + } + + // read specified bytes from the specified index + private function read($offset, $len) + { + // check the in-memory buffer first + if ($this->contentBuff != null) { + return substr($this->contentBuff, $offset, $len); + } + + // read from the file + $r = fseek($this->handle, $offset); + if ($r == -1) { + return null; + } + + $this->ioCount++; + $buff = fread($this->handle, $len); + if ($buff === false) { + return null; + } + + if (strlen($buff) != $len) { + return null; + } + + return $buff; + } + + // --- static util functions ---- + + // convert a string ip to long + public static function ip2long($ip) + { + $ip = ip2long($ip); + if ($ip === false) { + return null; + } + + // convert signed int to unsigned int if on 32 bit operating system + if ($ip < 0 && PHP_INT_SIZE == 4) { + $ip = sprintf("%u", $ip); + } + + return $ip; + } + + // read a 4bytes long from a byte buffer + public static function getLong($b, $idx) + { + $val = (ord($b[$idx])) | (ord($b[$idx + 1]) << 8) + | (ord($b[$idx + 2]) << 16) | (ord($b[$idx + 3]) << 24); + + // convert signed int to unsigned int if on 32 bit operating system + if ($val < 0 && PHP_INT_SIZE == 4) { + $val = sprintf("%u", $val); + } + + return $val; + } + + // read a 2bytes short from a byte buffer + public static function getShort($b, $idx) + { + return ((ord($b[$idx])) | (ord($b[$idx + 1]) << 8)); + } + + // load header info from a specified file handle + public static function loadHeader($handle) + { + if (fseek($handle, 0) == -1) { + return null; + } + + $buff = fread($handle, self::HeaderInfoLength); + if ($buff === false) { + return null; + } + + // read bytes length checking + if (strlen($buff) != self::HeaderInfoLength) { + return null; + } + + // return the decoded header info + return [ + 'version' => self::getShort($buff, 0), + 'indexPolicy' => self::getShort($buff, 2), + 'createdAt' => self::getLong($buff, 4), + 'startIndexPtr' => self::getLong($buff, 8), + 'endIndexPtr' => self::getLong($buff, 12) + ]; + } + + // load header info from the specified xdb file path + public static function loadHeaderFromFile($dbFile) + { + $handle = fopen($dbFile, 'r'); + if ($handle === false) { + return null; + } + + return self::loadHeader($handle); + } + + // load vector index from a file handle + public static function loadVectorIndex($handle) + { + if (fseek($handle, self::HeaderInfoLength) == -1) { + return null; + } + + $rLen = self::VectorIndexRows * self::VectorIndexCols * self::SegmentIndexSize; + $buff = fread($handle, $rLen); + if ($buff === false) { + return null; + } + + if (strlen($buff) != $rLen) { + return null; + } + + return $buff; + } + + // load vector index from a specified xdb file path + public static function loadVectorIndexFromFile($dbFile) + { + $handle = fopen($dbFile, 'r'); + if ($handle === false) { + return null; + } + + return self::loadVectorIndex($handle); + } + + // load the xdb content from a file handle + public static function loadContent($handle) + { + if (fseek($handle, 0, SEEK_END) == -1) { + return null; + } + + $size = ftell($handle); + if ($size === false) { + return null; + } + + // seek to the head for reading + if (fseek($handle, 0) == -1) { + return null; + } + + $buff = fread($handle, $size); + if ($buff === false) { + return null; + } + + // read length checking + if (strlen($buff) != $size) { + return null; + } + + return $buff; + } + + // load the xdb content from a file path + public static function loadContentFromFile($dbFile) + { + $str = file_get_contents($dbFile, false); + if ($str === false) { + return null; + } else { + return $str; + } + } + + public static function now() + { + return (microtime(true) * 1000); + } +} diff --git a/tests/BenchTest.php b/tests/BenchTest.php new file mode 100644 index 0000000..26729d6 --- /dev/null +++ b/tests/BenchTest.php @@ -0,0 +1,85 @@ +assertCount(3, $ps); + + $sip = XdbSearcher::ip2long($ps[0]); + $eip = XdbSearcher::ip2long($ps[1]); + $this->assertNotNull($sip); + $this->assertNotNull($eip); + $this->assertGreaterThanOrEqual($sip, $eip); + + $mip = ($sip + $eip) >> 1; + foreach ([$sip, ($sip + $mip) >> 1, $mip, ($mip + $eip) >> 1, $eip] as $ip) { + try { + $cTime = XdbSearcher::now(); + $region = $searcher->search($ip); + $costs += XdbSearcher::now() - $cTime; + } catch (\Exception $e) { + printf("failed to search ip `%s`\n", long2ip($ip)); + return; + } + + $this->assertNotNull($region, sprintf("failed to search ip `%s`", long2ip($ip))); + + // check the region info + $this->assertEquals( + $ps[2], + $region, + sprintf("failed search(%s) with (%s != %s)\n", long2ip($ip), $region, $ps[2]) + ); + + $count++; + } + } + + fclose($handle); + $searcher->close(); + printf( + "Bench finished, {cachePolicy: %s, total: %d, took: %ds, cost: %.3f ms/op}\n", + $cachePolicy, + $count, + (XdbSearcher::now() - $ts) / 1000, + $count == 0 ? 0 : $costs / $count + ); + } +} diff --git a/tests/LoadTest.php b/tests/LoadTest.php new file mode 100644 index 0000000..c3c233e --- /dev/null +++ b/tests/LoadTest.php @@ -0,0 +1,43 @@ +assertNotNull($header); + } + + public function testLoadVectorIndex() + { + $vIndex = XdbSearcher::loadVectorIndexFromFile(getenv('XDB_PATH')); + printf("vector index loaded, length=%d\n", strlen($vIndex)); + $this->assertNotNull($vIndex); + } + + public function testLoadContent() + { + $cBuff = XdbSearcher::loadContentFromFile(getenv('XDB_PATH')); + printf("content loaded, length=%d\n", strlen($cBuff)); + $this->assertNotNull($cBuff); + } +} diff --git a/tests/SearchTest.php b/tests/SearchTest.php new file mode 100644 index 0000000..fa3fb10 --- /dev/null +++ b/tests/SearchTest.php @@ -0,0 +1,64 @@ + '中国|0|北京|北京市|电信', + '123.151.137.18' => '中国|0|天津|天津市|电信', + '103.100.62.111' => '中国|0|香港|0|0', + '20.205.243.166' => '美国|0|0|0|微软', + ]; + + public static function builder($cachePolicy) + { + $file = getenv('XDB_PATH'); + if ('vectorIndex' === $cachePolicy) { + return XdbSearcher::newWithVectorIndex($file, XdbSearcher::loadVectorIndexFromFile($file)); + } elseif ('content' === $cachePolicy) { + return XdbSearcher::newWithBuffer(XdbSearcher::loadContentFromFile($file)); + } + return XdbSearcher::newWithFileOnly($file); + } + + private function search($searcher, $ip, $expected) + { + $ts = self::now(); + $r = $searcher->search($ip); + printf( + "ip: %s, region: %s, ioCount: %d, took: %.5f ms\n", + $ip, + $r, + $searcher->getIOCount(), + self::now() - $ts + ); + $this->assertEquals($expected, $r); + } + + public function testSearch() + { + foreach (['file', 'vectorIndex', 'content'] as $cachePolicy) { + printf("\ncachePolicy = %s\n", $cachePolicy); + $searcher = $this->builder($cachePolicy); + foreach ($this->ips as $ip => $expected) { + $this->search($searcher, $ip, $expected); + } + } + } +} diff --git a/tests/TestCase.php b/tests/TestCase.php new file mode 100644 index 0000000..19d0ac9 --- /dev/null +++ b/tests/TestCase.php @@ -0,0 +1,25 @@ +