The Jaccard index, also known as the Jaccard similarity coefficient (originally coined coefficient de communauté by Paul Jaccard), is a statistic used for comparing the similarity and diversity of sample sets.
Jaccard(A,B)=|A∩B| / |A∪B|
随机应变 ABCD: Always Be Coding and … : хороший
The Jaccard index, also known as the Jaccard similarity coefficient (originally coined coefficient de communauté by Paul Jaccard), is a statistic used for comparing the similarity and diversity of sample sets.
Jaccard(A,B)=|A∩B| / |A∪B|
<!doctype html> <html> <head> <title>検索画面</title> <meta charset="utf-8"> </head> <body> <h1>検索画面</h1> <form action="pdo_search.php" method="post"> 検索用語を入力:<input type="text" name="yourname"> <input type="submit" value="検索する"> </form> </body> </html>
mysqlのテーブルからfetch
<?php header("Content-type: text/html; charset=utf-8"); if(empty($_POST)){ header("Location: pdo_search_form.html"); exit(); } else { //名前入力判定 if(!isset($_POST['yourname']) || $_POST['yourname'] === ""){ $errors['name'] = "名前が入力されていません。"; } } if(count($errors) === 0){ $dsn = 'mysql:host=localhost;dbname=test;charset=utf8'; $user = 'dbuser'; $password = 'xxxx'; try { $dbh = new PDO($dsn, $user, $password); $statement = $dbh->prepare("select * from rss where title LIKE (:title)"); if($statement){ $yourname = $_POST['yourname']; $like_yourname = "%".$yourname."%"; //プレースホルダへ実際の値を設定 $statement->bindValue(':title',$like_yourname, PDO::PARAM_STR); if($statement->execute()){ //レコード件数取得 $row_count = $statement->rowCount(); while($row = $statement->fetch()){ $rows[] = $row; } } else { $errors['error'] = "検索失敗しました。"; } $dbh = null; } }catch (PDOException $e){ print('Error:'.$e->getMessage()); $errors['error'] = "データベース接続失敗しました。"; } } ?> <!doctype html> <html> <head> <title>検索結果</title> <meta charset="utf-8"> </head> <body> <?php if (count($errors) === 0): ?> <p><?=htmlspecialchars($yourname, ENT_QUOTES, 'utf-8')."さんで検索しました。"?></p> <p><?=$row_count?>件です。</p> <table border='1'> <tr><td>id</td><td>title</td></tr> <?php foreach($rows as $row){ ?> <tr> <td><?=$row['id']?></td> <td><?=htmlspecialchars($row['title'],ENT_QUOTES,'utf-8')?></td> </tr> <?php } ?> <?php elseif(count($errors) > 0): ?> <?php foreach($errors as $value){ echo "<p>".$value."</p>"; } ?> <?php endif; ?> </body> </html>
mysql重複カラムを除外
SELECT ALL col_name, … FROM tbl_name;
example:
mysql> select distinct address from personal;
mysqlにデータを挿入する場合は、uniqueキーを設定して、重複しないようにする。
crontab:設定した時間になったら定期的にコマンドを実行
crontab -e //クロン編集
MAILTO=”” //結果をメールで送信
0 9 * * * /home/dir/taisho.sh //時間指定と起動するシェルの指定
時間の設定例
分 時 日 月 曜日 説明
0 * * * * 毎時0分にソース実行
0,45 * * * * 毎時0分と45分にソースが実行
0 3 15 * * 毎月15日の3時にソースが実行
linux
ファイル名.sh
#! /bin/sh
command1
command2
command3
1. cronが起動しているかの確認
[vagrant@localhost rss6]$ /etc/rc.d/init.d/crond status crond (pid 2624) を実行中...
2.cronに既にバッチ処理が設定されているか確認
[vagrant@localhost rss6]$ crontab -l no crontab for vagrant
3. cronの設定ファイルを確認
[vagrant@localhost rss6]$ less /etc/crontab
4.バッチの設定
crontab -e 0 0 * * * /bin/bash /home/xxx/cron_all.sh > /dev/null 2>&1
$dbh = new PDO で接続、stmt->executeでsql文を実行
$stmt = $dbh->prepare(“insert into rss (title,link,site_title,site_link,date) values(?,?,?,?,?)”);
$stmt->execute(array($title,$link,$site_title,$site_link,$date));
reference
FETCH_ASSOC:Fetch a result row as an associative array
create table rss( id int not null auto_increment primary key, title varchar(255), link varchar(255), site_title varchar(64), site_link varchar(64), date datetime );
<?php try { $dbh = new PDO('mysql:host=localhost;dbname=test','dbuser','xxxx'); } catch(PDOException $e){ var_dump($e->getMessage()); exit; } $num = 5;//RSS取得件数 $rssUrl=array( 'http://x6xo.hatenablog.com/rss',//サイトURL ); //magpierss require_once('./magpierss-master/rss_fetch.inc'); define('MAGPIE_OUTPUT_ENCODING', 'UTF-8');//encode define('MAGPIE_CACHE_AGE','30');//cache foreach($rssUrl as $no => $rss_url){ if($rss_url != ''){ //urlからRSSを取得 $rss = @fetch_rss($rss_url); if($rss != NULL){ for ($i=0; $i<count($rss->items); $i++){ $rss->items[$i]["site_title"] = $rss->channel["title"]; $rss->items[$i]["site_link"] = $rss->channel["link"]; } // itemsを格納 $rssItemsArray[] = $rss->items; } } } $concatArray = array(); if (is_array($rssItemsArray)) { for($i=0;$i<count($rssItemsArray);$i++){ $concatArray = array_merge($concatArray,$rssItemsArray[$i]);//配列を統合する } foreach ($concatArray as $no => $values) { //RSSの種類によって日付を取得 if($values['published']){$date = $values['published'];} elseif($values['created']){$date = $values['created'];} elseif($values['pubdate']){$date = $values['pubdate'];} elseif($values['dc']['date']){$date = $values['dc']['date'];} $date=date("Y-m-d H:i:s",strtotime($date)); //Filter $nowtime = date("Y-m-d H:i:s",strtotime( "now" ));//現在時刻の取得 if($date > $nowtime){//未来記事の排除 }elseif(preg_match("/AD/", $values["title"])){//広告記事の排除 }elseif(preg_match("/PR/", $values["title"])){ }else{ //値の定義 $title=$values["title"]; $link=$values["link"]; $site_title=$values["site_title"]; $site_link=$values["site_link"]; //記事ごとに必要な項目を抽出 $rssArray[]=array($date, $title, $link, $site_title, $site_link); }// }// //ソート function cmp($a, $b) { if ($a[0] == $b[0]) return 0; return ($a[0] > $b[0]) ? -1 : 1; } if($rssArray) { usort($rssArray, 'cmp'); } if(count($rssArray) > $num){$count=$num;} else{$count=count($rssArray);} for ($i=0; $i<$count; $i++) { $date=date("Y-m-d H:i:s",strtotime($rssArray[$i][0])); $title=$rssArray[$i][1]; $link=$rssArray[$i][2]; $site_title=$rssArray[$i][3]; $site_link=$rssArray[$i][4]; $datelink = "<div>$date"; $titlelink = "<a href='$link'>$title</a>"; $site_titlelink = "<a href='$site_link'>[$site_title]</a></div>"; echo "$datelink$titlelink$site_titlelink</div>";//(確認用) $stmt = $dbh->prepare("insert into rss (title,link,site_title,site_link,date) values(?,?,?,?,?)"); $stmt->execute(array($title,$link,$site_title,$site_link,$date)); } } ?>
$stmt->fetchAll(PDO::FETCH_ASSOC) as $dataで、mysqlのデータを表示
<?php // 接続 try { $dbh = new PDO('mysql:host=localhost;dbname=test','dbuser','xxxx'); } catch(PDOException $e){ var_dump($e->getMessage()); exit; } ?> <html> <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> <title>rss</title> <body> <?php date_default_timezone_set('Asia/Tokyo'); $sql = "select * from rss order by date desc limit 10"; echo '<table>'; $stmt = $dbh->query($sql); foreach($stmt->fetchAll(PDO::FETCH_ASSOC) as $data){ $date = date("m/d H:i",strtotime($data['date'])); $title = ($data['title']); $link = ($data['link']); $site_link = ($data['site_link']); $site_title = ($data['site_title']); echo "<tr><td>$date</td><td><a href='$link'>$title</a></td><td><a href='$site_link'>[$site_title]</a></td></tr>"; } echo "</table>"; // 切断 $dbh = null; ?>
<?php $num = 5;//RSS取得件数 $rssUrl=array( 'http://x6xo.hatenablog.com/rss',//サイトURL ); //magpierss require_once('./magpierss-master/rss_fetch.inc'); define('MAGPIE_OUTPUT_ENCODING', 'UTF-8');//encode define('MAGPIE_CACHE_AGE','30');//cache foreach($rssUrl as $no => $rss_url){ if($rss_url != ''){ //urlからRSSを取得 $rss = @fetch_rss($rss_url); if($rss != NULL){ for ($i=0; $i<count($rss->items); $i++){ $rss->items[$i]["site_title"] = $rss->channel["title"]; $rss->items[$i]["site_link"] = $rss->channel["link"]; } // itemsを格納 $rssItemsArray[] = $rss->items; } } } $concatArray = array(); if (is_array($rssItemsArray)) { for($i=0;$i<count($rssItemsArray);$i++){ $concatArray = array_merge($concatArray,$rssItemsArray[$i]);//配列を統合する } foreach ($concatArray as $no => $values) { //RSSの種類によって日付を取得 if($values['published']){$date = $values['published'];} elseif($values['created']){$date = $values['created'];} elseif($values['pubdate']){$date = $values['pubdate'];} elseif($values['dc']['date']){$date = $values['dc']['date'];} $date=date("Y-m-d H:i:s",strtotime($date)); //Filter $nowtime = date("Y-m-d H:i:s",strtotime( "now" ));//現在時刻の取得 if($date > $nowtime){//未来記事の排除 }elseif(preg_match("/AD/", $values["title"])){//広告記事の排除 }elseif(preg_match("/PR/", $values["title"])){ }else{ //値の定義 $title=$values["title"]; $link=$values["link"]; $site_title=$values["site_title"]; $site_link=$values["site_link"]; //記事ごとに必要な項目を抽出 $rssArray[]=array($date, $title, $link, $site_title, $site_link); }// }// //ソート function cmp($a, $b) { if ($a[0] == $b[0]) return 0; return ($a[0] > $b[0]) ? -1 : 1; } if($rssArray) { usort($rssArray, 'cmp'); } if(count($rssArray) > $num){$count=$num;} else{$count=count($rssArray);} for ($i=0; $i<$count; $i++) { $date=date("Y-m-d H:i:s",strtotime($rssArray[$i][0])); $title=$rssArray[$i][1]; $link=$rssArray[$i][2]; $site_title=$rssArray[$i][3]; $site_link=$rssArray[$i][4]; $datelink = "<div>$date"; $titlelink = "<a href='$link'>$title</a>"; $site_titlelink = "<a href='$site_link'>[$site_title]</a></div>"; echo "$datelink$titlelink$site_titlelink</div>";//(確認用) } } ?>
mysql
create table rss( id int not null auto_increment primary key, title varchar(255), link varchar(255), site_title varchar(64), site_link varchar(64), date datetime );
<?php //RSS $rssUrl = array( 'http://blog.livedoor.jp/news4vip2/index.rdf',//ニュー速クオリティ 'http://himasoku.com/index.rdf', 'http://kanasoku.info/index.rdf', 'http://workingnews.blog117.fc.com/?xml', 'http://blog.livedoor.jp/dgnplus/index.rdf', 'http://majikichi.com/index.rdf', ); //MagpieRSS require_once('./magpierss-master/rss_fetch.inc'); define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); define('MAGPIE_CACHE_ON', false); //配列編集 foreach ($rssUrl as $no => $rss_url){ $rss = @fetch_rss($rss_url); if ($rss != NULL){ for ($i=0; $i<count($rss->items); $i++){ $rss->items[$i]["site_title"] = $rss->channel["title"]; $rss->items[$i]["site_link"] = $rss->channel["link"]; } $rssItemsArray[] = $rss->items; } } $contactArray = array(); for($i=0;$i<count($rssItemsArray);$i++){ $contactArray = array_merge($contactArray,$rssItemsArray[$i]);} foreach ($contactArray as $no => $values){ //RSSの種類によって日付を取得 if($values['published']){$date = $values['published'];} elseif($values['dc']['date']){$date = $values['dc']['date'];} elseif($values['pubdate']){$date = $values['pubdate'];} $date=date("Y-m-d H:i:s",strtotime($date)); //Filter $nowtime = date("Y-m-d h:i:s", strtotime("now")); if ($date > $nowtime){ } elseif(preg_match("/AD/", $values["title"])){ } elseif(preg_match("/PR/", $values["title"])){ } else { // 値の定義 $title=$values["title"]; $link=$values["link"]; $site_title=$value["site_title"]; $site_link=$values["site_link"]; // 画像取得 $content=$values["content"]["encoded"]; preg_match('/<img.*>/i', $content, $img_all); if(empty($img_all[0])){ $content = $values['description']; preg_match('/<img.*>/i', $content, $img_all); } preg_match('/http.*?(\.gif|\.png|\.jpg|\.jpeg$|\.bmp)/i', $img_all[0], $gazo); if(empty($gazo[0])){$gazo = "http://azaz.clouver.jp/antenna/noimage.png";} else{$gazo = $gazo[0];} //配列 $rssArray[]=array($date,$title, $link, $site_title, $site_link, $gazo); } } $num = 50; if(count($rssArray)>$num){$count=$num;}else{$count=count($rssArray);} rsort($rssArray); //HTML整形 for($i=0; $i<$count; $i++){ $date=date("m/d H:i", strtotime($rssArray[$i][0])); $title=$rssArray[$i][1]; $link=$rssArray[$i][2]; $site_title=$rssArray[$i][3]; $site_link=$rssArray[$i][4]; $gazo=$rssArray[$i][5]; $datelink = "<div class='date'>$date</div>"; $gazolink = "<a target='_blank' href='$link'><div class='trim'><img src='$gazo' /></div></a>"; $titlelink = "<div class='title'><a href='$link'>$title</a></div>"; $site_titlelink = "<div class='site_title'><a href='$site_link'>$date - [$site_title]</a></div>"; echo "<article>$gazolink$titlelink$site_titlelink</article>"; } ?>
<?php $xml = simplexml_load_file('http://rss.fnn-news.com/fnn_news.xml'); echo "<ul>"; foreach($xml->channel->item as $entry){ $entrydate = date ("Y.m.d",strtotime ($entry->pubDate)); echo "<li>$entrydate<a href='$entry->link'>$entry->title</a></li>"; } echo "</ul>"; ?>
画像付き
reference:preg_match
Perform a regular expression match
<?php $xml = simplexml_load_file('http://rss.fnn-news.com/fnn_news.xml'); foreach($xml->channel->item as $entry){ echo "<article>"; // sitelink $site_title = $xml->channel->title; $site_link = $xml->channel->link; $site_titlelink = "<a href='$site_link'>$site_title</a>"; echo $site_titlelink; // date $date = date ("Y.m.d",strtotime($entry->pubDate)); echo $date; //article link $titlelink = "<a href='$entry->link'>$entry->title</a>"; echo $titlelink; // picture preg_match('/<img.*>/i', $entry->description, $entryimg); echo $entryimg[0]; echo "</article>"; } ?>
<?php $database = "nameofthedatabase" $dbconnect = mysql_pconnect(localhost, dbuser, dbpassword); mysql_select_db($database, $dbconnect); $query = "select link, headline, description from 'headlines' limit 15"; $result = mysql_query($query, $dbconnect); while ($line = mysql_fetch_assoc($result)) { $result[] = $line; } $now = date("D, d M Y H:i:s T"); $output = "<?xml version=\"1.0\"?> <rss version=\"2.0\"> <channel> <title>Our Demo RSS</title> <link>http://wwww.tracypeterson.com/RSS/RSS.php</link> <description>A TEST RSS</description> <language>en-us</language> <pubDate>$now</pubDate> <docs>http://someurl.com</docs> <managingEditor>yo@youremail.com<managingEditor> <webMaster>you@youremail.com</webMaster> "; foreach ($return as $line) { $output .= "<item><title>".htmlentities($line['headline'])."</title> <link>".htmlentities($line['link'])."</link> <description>".htmlentities(strip_tags($line['description']))."</description> </item>"; } $output .= "</channel></rss>"; echo $output; ?>
<?php // ヘッダーを出力する header("Content-type: text/xml;charset=utf-8"); echo '<?xml version="1.0" encoding="UTF-8"?>'; // サイト情報を書き込む echo ' <rss version="2.0"> <channel> <title>hogehogeオレオレRSS</title> <link>http://google.com</link> <description>Facebookのgraph api, cake php, javascriptなどの記事を更新していきます。</description> <category>php, graph api, facebook, cakephp</category> <generator>@DAI199</generator> <webMaster>@DAI199</webMaster> '; // 記事の内容を更新 for(ループさせる) { echo ' <item> <title>Test'.$i.'</title> <link>http://google.com</link> <description>TestTest</description> <author>@DAI199</author> <category>php</category> </item> '; } echo ' </channel> </rss> '; ?>
<?php // ライブラリの読み込み require_once './Feed.php'; // 取得するフィードのurl指定 $url = "http://www.lesson5.info/?feed=rss2" ; // インスタンスの作成 $feed = new Feed; // RSS読み込み $rss = $feed->loadRss( $url ); // HTML表示用 $html = ''; $sitename = $rss->title ; foreach( $rss->item as $item ) { // 各エントリーの処理 $title = $item->title; //タイトル $link = $item->link ; //リンク // 日付の取得(unix timestamp) foreach( array( "pubDate", "date_timestamp", "dc:date", "published", "issued" ) as $time ) { if ( isset( $item->{ $time }) && !empty( $item->{ time })) { $timestamp = ( is_int( $item->{ $time })) ? $item->{ $time }: strtotime( $item->{ $time }); break ; } } //仮に日付が取得できなかったら現在時刻 if( !isset( $timestamp )) { $timestamp = time(); } $html .= '<dt><a href="' . $link . '" traget="_blank">' . $title . '</a>(' . date( "Y/m/d" , $timestamp).')</dt></dd>' . $sitename .'</dd>'; } ?> <dl> <?php echo $html ?> </dl>
<?php // ライブラリの読み込み require_once './Feed.php'; // キャッシュの設定 Feed::$cacheDir = './temp'; Feed::$cacheExpire = '1 hours'; // html表示用 $html = ''; // 表示最大件数 $maxview = 5; // 取得するフィードのurl指定 $urls = array( "http://www.lesson5.info/?feed=rss2", "http://www.lesson5.info/?feed=rss2", "http://www.lesson5.info/?feed=rss2", "http://www.lesson5.info/?feed=rss2", "http://www.lesson5.info/?feed=rss2", "http://www.lesson5.info/?feed=rss2" ); // 各rssをまとめた配列 $entrylist = array(); for ($i = 0; $i < count($urls); $i++){ $url = $urls[$i]; // インスタンスの作成 $feed = new Feed; // RSS読み込み $rss = Feed::loadRss($url); for ($entry = 0; $entry < 1; $entry++){ $sitename = $rss->title; $sitelink = $rss->link; $item = $rss->item[$entry]; // 各エントリーの処理 $title = $item->title ; $link = $item->link ; $timestamp = strtotime($item->pubDate); // 配列に $entrylist[$i]['sitename'] = $sitename; $entrylist[$i]['sitelink'] = $sitelink; $entrylist[$i]['title'] = $title; $entrylist[$i]['link'] = $link; $entrylist[$i]['timestamp'] = $timestamp; } } // 日付でソート foreach ((array)$entrylist as $key => $value){ $sort[$key] = $value['timestamp']; } array_multisort($sort, SORT_DESC, $entrylist); if ( count($entrylist) > $maxview ){ $entrynum = $maxview; } else { $entrynum = count($entrylist); } for ($i = 0; $i < $entrynum; $i++){ $html .= '<dt><a href="' . $entrylist[$i]['link'] . '" target="_blank">' . $entrylist[$i]['title'] . '</a> (' . date( "Y/m/d" , $entrylist[$i]['timestamp'] ) . ')</dt><dd><a href="' . $entrylist[$i]['sitelink'] . '" target="_blank">' . $entrylist[$i]['sitename'] . '</a></dd>' ; } ?> <dl> <?php echo $html ?> </dl>