国产av日韩一区二区三区精品,成人性爱视频在线观看,国产,欧美,日韩,一区,www.成色av久久成人,2222eeee成人天堂

Community

Learn

Tools Library

AI Tools

Leisure

English

php search algorithm

phpcn_u1582 2017-05-16 13:02:53

532

A file has 300,000 pieces of data! One data per line

Peer words! For example, post stop tops are peer words
How do you find out all the data in it

Please give me some ideas

phpcn_u1582

reply all(3)

phpcn_u15822017-05-16 13:04:53 3 floor

Use linux commands to complete your requirements. Example

統(tǒng)計文件夾下包含Action( 數(shù)量
grep Action\( ~/www/pms/app/app/controllers/*.php | wc -l

Like +0

Add Reply

PHPzhong2017-05-16 13:04:53 2 floor

My suggestion is to write a special sorting algorithm, and then use usort to sort, so that the same words are sorted together, and then output in order

The rough logic of the sorting algorithm is

int cmp($left, $right) {
   //如果長度都不一致，直接放棄
   if(strlen($left) != strlen($right))
       return strcmp($left, $right);
   //長度一致的，按照字符切分，統(tǒng)計，判斷是否一致
   $arrleft = str_split($left);
   $arrright = str_split($right);
   $leftstat = array();
   $rightstat = array();
   foreach($arrleft as $char) {
        if(array_key_exists($char, $leftstat))
            $leftstat[$char]++;
        else
            $leftstat[$char]=0;
   }
   foreach($arrright as $char) {
        //邏輯類似
   }
   //比較兩個數(shù)組的統(tǒng)計是否一致
   if(count(array_diff_assoc($leftstat, $rightstat)) == 0)
       return 0;
   else
       return strcmp($left, $right);
}

1. To sort 300,000 rows of data, use usort + the above cmp function

2. Traverse the sorted data from row 2 to the end, and judge whether this row is consistent with the previous row. Yes: output, no, go down.

Probably. Written by hand

Like +0

Add Reply

Ty802017-05-16 13:04:53 1 floor

<?php

/**
 * 建立 tries Tree，存儲對應單詞，減少存儲量，加快檢索速度
 * (T)代表是一個單詞
 * (F)代表不是一個單詞
 *
 * hi
 * his
 * is
 *     root
 *    /   \
 *  h (F)  i(F)
 *  |      |
 *  i (T)  s(T)
 *  |
 *  s (T)
 */
class TreeNode
{
    public $isStr;
    public $next;

    /**
     * TreeNode constructor.
     *
     * 字符串為 a-z 組成，所以可以直接將大小寫字符，都存成小寫
     * 0 - 26 對應 a - z
     */
    public function __construct()
    {
        $this->isStr = false;
        $this->next = [];
    }
}


///構(gòu)建Tries Tree
class Helper
{
    public $treeRoot;

    public $debug = false;///此處開啟是否以字符為索引

    public function __construct()
    {
        $this->treeRoot = new TreeNode();
    }

    /**
     * @param $str
     */
    public function insert($str)
    {
        $str = strtolower($str);///將所有的字符都作為小寫存儲

        $node = $this->treeRoot;
        for ($i = 0; $i < strlen($str); ++$i) {
            $index = $this->char2index($str{$i});
//            $index = $str{$i};
            if (empty($node->next[$index])) {
                $node->next[$index] = new TreeNode();
            }
            $node = $node->next[$index];
        }
        $node->isStr = true;
    }

    private function char2index($ch)
    {
        return ($this->debug) ? $ch : intval(ord($ch) - ord('a'));
    }

    private function index2char($index)
    {
        return ($this->debug) ? $index : chr($index + ord('a'));
    }

    /**
     * 查找對應的字符串的同形詞
     * @param $str
     * @return array
     */
    public function find($str)
    {
        $result = [];

        $str = strtolower($str);///將所有的字符都作為小寫存儲

        $nextStr = ''; ///從后向前，逐漸追加字符，查找對應的數(shù)據(jù)
        for ($i = strlen($str) - 1; $i >= 0; --$i) {
            ///這里可以設置閾值，比如當需要找的字符串長度 > 2
            /// if(strlen($nextStr) < 2) continue;
            $nextStr = $str{$i} . $nextStr;

            $result = array_merge($result, $this->getResult($nextStr));
        }
        return array_unique($result);
    }

    /**
     * 找到對應字符串開頭的所有單詞
     * @param $str
     * @return array
     */
    private function getResult($str)
    {
        $result = [];
        $root = $this->treeRoot;

        ///先找到 tries 樹中，對應的節(jié)點，確定節(jié)點是否包含子節(jié)點
        for ($i = 0; $i < strlen($str); ++$i) {
            if (empty($root)) {
                return $result;
            }
            $index = $this->char2index($str{$i});
            $root = $root->next[$index];
        }

        ///利用隊列遍歷Tries 樹，實現(xiàn) O(n) 檢索
        $queue = new SplQueue();

        ///將節(jié)點，和字符起始點，記錄到數(shù)據(jù)中，后續(xù)取用
        $next = ['node' => $root, 'str' => $str];
        $queue->push($next);

        while (!$queue->isEmpty()) {
            $next = $queue->pop();

            if ($next['node']->isStr) {///確定找到的是單詞后，記錄到結(jié)果集
                $result[] = $next['str'];
            }

            ///將下一個可能的結(jié)果集數(shù)組，放入到隊列中查找
            if (!empty($next['node']->next)) {
                foreach ($next['node']->next as $index => $item) {
                    $next = ['node' => $item, 'str' => $next['str'] . $this->index2char($index)];
                    $queue->push($next);
                }
            }
        }
        return $result;
    }
}

$helper = new Helper();

$helper->insert("is");
$helper->insert("his");
$helper->insert("her");

$helper->insert('post');
$helper->insert('top');
$helper->insert('stop');


$result = $helper->find('post');
print_r($result);

$result = $helper->find('hi');
print_r($result);

Like +0

Add Reply