Fixed checking if book info is already in the database

This commit is contained in:
krzysiej
2018-04-25 12:37:23 +02:00
parent b3a1ebafe7
commit b496f8a531
2 changed files with 2799 additions and 2593 deletions

4107
packt.html

File diff suppressed because one or more lines are too long

View File

@@ -34,8 +34,6 @@ function c($url, $post = [])
$server_output = curl_exec($ch); $server_output = curl_exec($ch);
strlen($server_output); strlen($server_output);
// $info = curl_getinfo($ch);
curl_close($ch); curl_close($ch);
@@ -50,6 +48,11 @@ function resolveBookUrl($bookUrl)
return 'https://www.packtpub.com/' . str_replace('https://www.packtpub.com/', '', $bookUrl); return 'https://www.packtpub.com/' . str_replace('https://www.packtpub.com/', '', $bookUrl);
} }
function truncateBookUrl($bookUrl)
{
return str_replace('https://www.packtpub.com', '', $bookUrl);
}
function getBookInfo($bookUrl) function getBookInfo($bookUrl)
{ {
@@ -99,32 +102,30 @@ $booksData = [];
$books = $document->find('.product-line.unseen'); $books = $document->find('.product-line.unseen');
$db = new SQLite3('data.db'); $db = new SQLite3('data.db');
foreach ($books as $book) { foreach ($books as $book) {
$bookData = []; $bookData = [];
$bookData['title'] = str_replace(["\r\n"], '', trim($book->first('.title::text'))); $bookData['title'] = str_replace(["\r\n"], '', trim($book->first('.title::text')));
$bookData['nid'] = (int)$book->attr('nid'); $bookData['nid'] = (int)$book->attr('nid');
$bookData['pdf'] = $book->first('a[href$=pdf]::attr(href)'); $bookData['pdf'] = truncateBookUrl($book->first('a[href$=pdf]::attr(href)'));
$bookData['epub'] = $book->first('a[href$=epub]::attr(href)'); $bookData['epub'] = truncateBookUrl($book->first('a[href$=epub]::attr(href)'));
$bookData['mobi'] = $book->first('a[href$=mobi]::attr(href)'); $bookData['mobi'] = truncateBookUrl($book->first('a[href$=mobi]::attr(href)'));
$bookData['code'] = $book->first('a[href*=code_download]::attr(href)'); $bookData['code'] = truncateBookUrl($book->first('a[href*=code_download]::attr(href)'));
$bookData['isbn'] = $book->first('div[isbn]::attr(isbn)'); $bookData['isbn'] = $book->first('div[isbn]::attr(isbn)');
$bookData['img'] = $book->first('img[class*=imagecache]::attr(src)'); $bookData['img'] = $book->first('img[class*=imagecache]::attr(src)');
$bookData['url'] = $book->first('div[class*=product-thumbnail]')->first('a::attr(href)'); $bookData['url'] = truncateBookUrl($book->first('div[class*=product-thumbnail]')->first('a::attr(href)'));
$stmt = $db->prepare('SELECT id FROM book WHERE nid = :nid'); $stmt = $db->prepare('SELECT id FROM book WHERE nid = :nid');
$stmt->bindValue(':nid', $bookData['nid'], SQLITE3_INTEGER); $stmt->bindValue(':nid', $bookData['nid'], SQLITE3_INTEGER);
$result = $stmt->execute(); $result = $stmt->execute();
if ($result->numColumns() > 0) { $resultData = $result->fetchArray(SQLITE3_ASSOC);
if (!$resultData) {
} else {
$bookData['info'] = getBookInfo($bookData['url']); $bookData['info'] = getBookInfo($bookData['url']);
$stmt = $db->prepare('REPLACE INTO book (nid, title, isbn, img, url, datepublished, numberofpages, reviewCount, ratingValue, category, pdf, epub, mobi, code) $stmt = $db->prepare('REPLACE INTO book (nid, title, isbn, img, url, datepublished, numberofpages, reviewCount, ratingValue, category, pdf, epub, mobi, code)
VALUES (:nid, :title, :isbn, :img, :url, :datepublished, :numberofpages, :reviewCount, :ratingValue, :category, :pdf, :epub, :mobi, :code)'); VALUES (:nid, :title, :isbn, :img, :url, :datepublished, :numberofpages, :reviewCount, :ratingValue, :category, :pdf, :epub, :mobi, :code)');