Files
packtpub-browser/packt.php
2018-06-28 15:01:53 +02:00

252 lines
8.8 KiB
PHP

<?php
include 'vendor/autoload.php';
use DiDom\Document;
set_time_limit(-1);
$settings = include 'settings.php';
$loginData = [
'email' => $settings['email'],
'password' => $settings['password'],
'op' => 'Login',
'form_build_id' => 'form-fba4b62ee04aafbf045b1d9ae019d90b',
'form_id' => 'packt_user_login_form'
];
function c($url, $post = [], $localFilePath = null)
{
$cookie = "cookie.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
if (!empty($post) && count($post)) {
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($post));
}
/**
* this means url is a path to a file and needs to be saved localy under $localFilePath location
*/
if (!is_null($localFilePath)) {
$fp = fopen($localFilePath, 'w+');
curl_setopt($ch, CURLOPT_FILE, $fp);
} else {
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
}
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
$server_output = curl_exec($ch);
strlen($server_output);
curl_close($ch);
return $server_output;
}
function resolveBookUrl($bookUrl)
{
return 'https://www.packtpub.com/' . trim(str_replace('https://www.packtpub.com/', '', $bookUrl), '/');
}
function truncateBookUrl($bookUrl)
{
return str_replace('https://www.packtpub.com', '', $bookUrl);
}
function getBookInfo($bookUrl)
{
$bookUrl = resolveBookUrl($bookUrl);
$bookData = [];
$bookPage = new Document($bookUrl, true);
$bookData['datepublished'] = $bookPage->first('.book-top-block-info-authors time[itemprop="datePublished"]::attr(datetime)');
$bookData['numberofpages'] = $bookPage->first('span[itemprop="numberOfPages"]::text');
$bookData['isbn'] = $bookPage->first('span[itemprop="isbn"]::text');
$bookData['reviewCount'] = $bookPage->first('meta[itemprop="reviewCount"]::attr(content)');
$bookData['ratingValue'] = $bookPage->first('meta[itemprop="ratingValue"]::attr(content)');
$bookData['toc'] = [];
foreach ($bookPage->find('#book-info-toc.onlyDesktop .book-toc-chapter') as $chapter) {
if (!is_null($chapter->first('div[class*="book-toc-chapter-title"]'))) {
$tocSection = [];
$tocSection['title'] = trim($chapter->first('div[class*="book-toc-chapter-title"]')->text());
$tocSection['subchapters'] = $chapter->find('div[class*="book-toc-section-text"]::text');
$bookData['toc'][] = $tocSection;
}
}
$bookData['description'] = $bookPage->find('div.book-info-bottom-indetail-text[itemprop="description"] p::text');
$bookData['willLearn'] = $bookPage->find('div.book-info-will-learn-text li::text');
$bookData['category'] = $bookPage->first('div[data-product-id="' . $bookData['isbn'] . '"]::attr(data-product-category)');
$bookData['authors'] = [];
foreach ($bookPage->find('[itemprop="author"]') as $author) {
$authorData = [];
$authorData['name'] = $author->first('h3::text');
$authorData['bio'] = $author->find('p::text');
$bookData['authors'][] = $authorData;
}
return $bookData;
}
$db = new SQLite3('data.db');
//$stmt = $db->prepare('SELECT * FROM book order by random() limit 1 ');
$stmt = $db->prepare('SELECT * FROM book');
$booksResult = $stmt->execute();
//$bookData = $result->fetchArray(SQLITE3_ASSOC);
echo '<pre>';
while ($book = $booksResult->fetchArray(SQLITE3_ASSOC)) {
$bookInfo = getBookInfo($book['url']);
// print_r($book);
foreach ($bookInfo['authors'] as $author) {
$stmt = $db->prepare('SELECT * FROM author WHERE name = :name and bio = :bio');
$stmt->bindValue(':name', trim($author['name']), SQLITE3_TEXT);
$stmt->bindValue(':bio', trim(implode(' ', $author['bio'])), SQLITE3_TEXT);
$result = $stmt->execute();
$authorData = $result->fetchArray(SQLITE3_ASSOC);
if (!$authorData) {
$stmt = $db->prepare('INSERT INTO author(name, bio) VALUES (:name, :bio)');
$stmt->bindValue(':name', trim($author['name']), SQLITE3_TEXT);
$stmt->bindValue(':bio', trim(implode(' ', $author['bio'])), SQLITE3_TEXT);
$result = $stmt->execute();
$stmt = $db->prepare('select seq from sqlite_sequence where name="author"');
$result = $stmt->execute();
$seqData = $result->fetchArray(SQLITE3_ASSOC);
$stmt = $db->prepare('INSERT INTO book_author(book_id, author_id) VALUES (:book_id, :author_id)');
$stmt->bindValue(':book_id', $book['id'], SQLITE3_INTEGER);
$stmt->bindValue(':author_id', $seqData['seq'], SQLITE3_INTEGER);
$result = $stmt->execute();
}
var_dump($authorData);
}
}
//print_r($bookData);
//print_r($bookInfo);
echo '</pre>';
die();
$return = c('https://www.packtpub.com/', $loginData);
$return = c('https://www.packtpub.com/account/my-ebooks');
$document = new Document($return);
//
//$document = new Document('packt.html', true);
$booksData = [];
$books = $document->find('.product-line.unseen');
$dl = 0;
$db = new SQLite3('data.db');
//shuffle($books);
foreach ($books as $book) {
$bookData = [];
$bookData['title'] = str_replace(["\r\n"], '', trim($book->first('.title::text')));
$bookData['nid'] = (int)$book->attr('nid');
$bookData['pdf'] = truncateBookUrl($book->first('a[href$=pdf]::attr(href)'));
$bookData['epub'] = truncateBookUrl($book->first('a[href$=epub]::attr(href)'));
$bookData['mobi'] = truncateBookUrl($book->first('a[href$=mobi]::attr(href)'));
$bookData['code'] = truncateBookUrl($book->first('a[href*=code_download]::attr(href)'));
$bookData['isbn'] = $book->first('div[isbn]::attr(isbn)');
$bookData['img'] = $book->first('img[class*=imagecache]::attr(src)');
$bookData['url'] = truncateBookUrl($book->first('div[class*=product-thumbnail]')->first('a::attr(href)'));
$stmt = $db->prepare('SELECT id FROM book WHERE nid = :nid');
$stmt->bindValue(':nid', $bookData['nid'], SQLITE3_INTEGER);
$result = $stmt->execute();
$resultData = $result->fetchArray(SQLITE3_ASSOC);
$dl += downloadBook($bookData['pdf']);
$dl += downloadBook($bookData['epub']);
$dl += downloadBook($bookData['mobi']);
$dl += downloadBook($bookData['code']);
if (!$resultData) {
$bookData['info'] = getBookInfo($bookData['url']);
$stmt = $db->prepare('REPLACE INTO book (nid, title, isbn, img, url, datepublished, numberofpages, reviewCount, ratingValue, category, pdf, epub, mobi, code)
VALUES (:nid, :title, :isbn, :img, :url, :datepublished, :numberofpages, :reviewCount, :ratingValue, :category, :pdf, :epub, :mobi, :code)');
$stmt->bindValue(':nid', $bookData['nid'], SQLITE3_INTEGER);
$stmt->bindValue(':title', $bookData['title'], SQLITE3_TEXT);
$stmt->bindValue(':isbn', $bookData['isbn'], SQLITE3_TEXT);
$stmt->bindValue(':img', $bookData['img'], SQLITE3_TEXT);
$stmt->bindValue(':url', $bookData['url'], SQLITE3_TEXT);
$stmt->bindValue(':datepublished', $bookData['info']['datepublished'], SQLITE3_TEXT);
$stmt->bindValue(':numberofpages', $bookData['info']['numberofpages'], SQLITE3_INTEGER);
$stmt->bindValue(':reviewCount', $bookData['info']['reviewCount'], SQLITE3_INTEGER);
$stmt->bindValue(':ratingValue', (float)$bookData['info']['ratingValue'], SQLITE3_FLOAT);
$stmt->bindValue(':category', $bookData['info']['category'], SQLITE3_TEXT);
$stmt->bindValue(':pdf', $bookData['pdf'], SQLITE3_TEXT);
$stmt->bindValue(':epub', $bookData['epub'], SQLITE3_TEXT);
$stmt->bindValue(':mobi', $bookData['mobi'], SQLITE3_TEXT);
$stmt->bindValue(':code', $bookData['code'], SQLITE3_TEXT);
$result = $stmt->execute();
}
echo $dl . ' - ';
if ($dl > 500) {
// var_dump($dl);
die();
}
}
function downloadBook($url)
{
if (strlen($url) && !fileExists(fileNameFromPath($url))) {
$localPath = fileNameFromPath($url);
$directory = directoryNameFromPath($url);
$fullUrl = resolveBookUrl($url);
echo ($fullUrl) . "\n";
// var_dump($localPath);
if (!is_dir($directory)) {
mkdir($directory, 0777, true);
}
c($fullUrl, [], $localPath);
return 1;
}
return 0;
}
function fileNameFromPath($filePath)
{
$filePathParts = explode('/', trim($filePath, '/'));
return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR . $filePathParts[1] . '.' . (isset($filePathParts[2]) ? $filePathParts[2] : 'zip');
}
function directoryNameFromPath($filePath)
{
$filePathParts = explode('/', trim($filePath, '/'));
return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR;
}
function fileExists($filePath)
{
return file_exists($filePath);
}