728 lines
30 KiB
PHP
728 lines
30 KiB
PHP
<?php
|
|
include 'vendor/autoload.php';
|
|
|
|
use DiDom\Document;
|
|
|
|
set_time_limit(-1);
|
|
$credentials = include 'settings.php';
|
|
|
|
|
|
class Packt
|
|
{
|
|
|
|
|
|
private $credentials = [];
|
|
const BASE_URL = 'https://www.packtpub.com/';
|
|
const MY_BOOKS_PAGE = self::BASE_URL . 'account/my-ebooks';
|
|
const LOGIN_PAGE = 'https://www.packtpub.com/mapt-rest/users/tokens';
|
|
|
|
// private $accessToken = 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJmNjUyODNlYy00OTE1LTQ3ZTktYjM1Mi1mYzkzM2FhOTUxYWIiLCJ1c2VybmFtZSI6ImtyenlzaWVqQGdtYWlsLmNvbSIsInBlcm1pc3Npb25zIjpbXSwiaWF0IjoxNTMyMDcxNjQ0LCJleHAiOjE1MzIwNzUyNDR9.Gp3no55EkmoXJJ4TLk2MwPsix7dzvudiWqJEA4To3W9xkubhFFAtkISJoMO9AHLuQJITns-zMAjg1VBnnQSp3G6U7lzi7PDzM_CThuZJHKy6jBA_HtBnHJ-GoAK6hVxqifooTECmXIAignMz8sEbThHAUqY75djXuDYnaAlJaBvzwlt-mRkbsvRjGHr379LsvoezPeYCgJtme1J_xPonjaJZJ9nP04QfiwbnUSEiGb71Fvixc1Vo3F8QO11fHgI_fXLZSidgB4WGkz_m9AvjmqOXc02fbxf-WgfPe8Oj-qndLoq6ZAWIy86XjAx8inFHWrPbvyoIdLRSty1a9aEDkQ';
|
|
private $accessToken = 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJmNjUyODNlYy00OTE1LTQ3ZTktYjM1Mi1mYzkzM2FhOTUxYWIiLCJ1c2VybmFtZSI6ImtyenlzaWVqQGdtYWlsLmNvbSIsInBlcm1pc3Npb25zIjpbXSwiaWF0IjoxNTMyMzI5MDk4LCJleHAiOjE1MzIzMzI2OTh9.uUyYz2uMT3mZ77-Uc2yoVH4y5qqI_WfpXB9kKw81qhUKRuonD6jhLZKbCAQSObkKyEJz1bYB0cQVfwsKZiwAuY58VrCQ0ykQUPrQh3MfAEJ5zpE3CquAf5o892ToLbIwGieC05ElHXsMUCfgimA6ECtHzIT9dC4l93FYaoDpsnyZFA9ikxUJIAKI51rOS879J_B98UPdw-NOlsCl1-2CcTnjf5He4CQuYWYvnj0NpV0_GP3w5rnNy8989prqks_m7WKFMJXzhH9OlexKVgF3J4IS3216yEJrL8ViQ_gkAZBRRuKJYeYUiAJ4H7Hy_IohN90-SpaH4jvIt81SApVc2A';
|
|
// private $accessToken;
|
|
private $database = null;
|
|
|
|
public function __construct($credentials)
|
|
{
|
|
$this->credentials = $credentials;
|
|
}
|
|
|
|
/**
|
|
* @return null|SQLite3
|
|
*/
|
|
public function getDatabase()
|
|
{
|
|
if (!$this->database) {
|
|
$this->database = new SQLite3('data.db');
|
|
}
|
|
return $this->database;
|
|
}
|
|
|
|
public function resolveBookUrl($bookUrl)
|
|
{
|
|
return self::BASE_URL . trim(str_replace(self::BASE_URL, '', $bookUrl), '/');
|
|
}
|
|
|
|
public function truncateBookUrl($bookUrl)
|
|
{
|
|
return str_replace('https://www.packtpub.com', '', $bookUrl);
|
|
}
|
|
|
|
// public function getBookInfo($bookUrl)
|
|
// {
|
|
//
|
|
// $bookUrl = resolveBookUrl($bookUrl);
|
|
// $bookData = [];
|
|
// $bookPage = new Document($bookUrl, true);
|
|
// $bookData['datepublished'] = $bookPage->first('.book-top-block-info-authors time[itemprop="datePublished"]::attr(datetime)');
|
|
// $bookData['numberofpages'] = $bookPage->first('span[itemprop="numberOfPages"]::text');
|
|
// $bookData['isbn'] = $bookPage->first('span[itemprop="isbn"]::text');
|
|
// $bookData['reviewCount'] = $bookPage->first('meta[itemprop="reviewCount"]::attr(content)');
|
|
// $bookData['ratingValue'] = $bookPage->first('meta[itemprop="ratingValue"]::attr(content)');
|
|
//
|
|
// $bookData['toc'] = [];
|
|
// foreach ($bookPage->find('#book-info-toc.onlyDesktop .book-toc-chapter') as $chapter) {
|
|
// if (!is_null($chapter->first('div[class*="book-toc-chapter-title"]'))) {
|
|
// $tocSection = [];
|
|
// $tocSection['title'] = trim($chapter->first('div[class*="book-toc-chapter-title"]')->text());
|
|
// $tocSection['subchapters'] = $chapter->find('div[class*="book-toc-section-text"]::text');
|
|
// $bookData['toc'][] = $tocSection;
|
|
// }
|
|
// }
|
|
//
|
|
// $bookData['description'] = implode(' ', $bookPage->find('div.book-info-bottom-indetail-text[itemprop="description"] p::text'));
|
|
// $bookData['willLearn'] = $bookPage->find('div.book-info-will-learn-text li::text');
|
|
//
|
|
// $bookData['category'] = $bookPage->first('div[data-product-id="' . $bookData['isbn'] . '"]::attr(data-product-category)');
|
|
//
|
|
// $bookData['authors'] = [];
|
|
// foreach ($bookPage->find('[itemprop="author"]') as $author) {
|
|
//
|
|
// $authorData = [];
|
|
// $authorData['name'] = trim($author->first('.book-info-bottom-author-title')->text());
|
|
// $authorData['bio'] = preg_replace('#\s{2,}#', ' ', trim($author->first('.book-info-bottom-author-body')->text()));
|
|
// $bookData['authors'][] = $authorData;
|
|
// }
|
|
// return $bookData;
|
|
// }
|
|
|
|
public function saveBookChapters($data, $bookId)
|
|
{
|
|
$db = $this->getDatabase();
|
|
foreach ($data['toc'] as $chapterNumber => $chapter) {
|
|
|
|
$stmt = $db->prepare('REPLACE INTO chapter (book_id, chapter_number, parent_id, name)
|
|
VALUES (:book_id, :chapter_number, :parent_id, :name)');
|
|
$stmt->bindValue(':book_id', $bookId, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':chapter_number', $chapterNumber + 1, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':parent_id', null, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':name', $chapter['title'], SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
|
|
$stmt = $db->prepare('select seq from sqlite_sequence where name="chapter"');
|
|
$result = $stmt->execute();
|
|
$lastInsertedId = $result->fetchArray(SQLITE3_ASSOC);
|
|
$lastInsertedId['seq'];
|
|
|
|
if (isset($chapter['subchapters'])) {
|
|
foreach ($chapter['subchapters'] as $subChapterNumber => $subChapterName) {
|
|
|
|
$stmt = $db->prepare('REPLACE INTO chapter (book_id, chapter_number, parent_id, name)
|
|
VALUES (:book_id, :chapter_number, :parent_id, :name)');
|
|
$stmt->bindValue(':book_id', $bookId, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':chapter_number', $subChapterNumber + 1, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':parent_id', $lastInsertedId['seq'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':name', $subChapterName, SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
public function directoryNameFromPath($filePath)
|
|
{
|
|
$filePathParts = explode('/', trim($filePath, '/'));
|
|
return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR;
|
|
}
|
|
|
|
public function downloadBook($url)
|
|
{
|
|
|
|
if (strlen($url) && !file_exists(fileNameFromPath($url))) {
|
|
$localPath = $this->fileNameFromPath($url);
|
|
$directory = $this->directoryNameFromPath($url);
|
|
$fullUrl = $this->resolveBookUrl($url);
|
|
|
|
if (!is_dir($directory)) {
|
|
mkdir($directory, 0777, true);
|
|
}
|
|
c($fullUrl, [], $localPath);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
public function fileNameFromPath($filePath)
|
|
{
|
|
$filePathParts = explode('/', trim($filePath, '/'));
|
|
return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR . $filePathParts[1] . '.' . (isset($filePathParts[2]) ? $filePathParts[2] : 'zip');
|
|
}
|
|
|
|
|
|
/**
|
|
* funkcja której zadaniem jest logowanie uzytkownika na stronie
|
|
*/
|
|
private function login()
|
|
{
|
|
if (is_null($this->accessToken)) {
|
|
$loginData = c(self::LOGIN_PAGE, json_encode($this->credentials));
|
|
$loginJson = json_decode($loginData);
|
|
print_r($loginJson);
|
|
if (isset($loginJson->httpStatus, $loginJson->status) && $loginJson->httpStatus == 200 && $loginJson->status == 'success') {
|
|
$this->accessToken = $loginJson->data->access;
|
|
} else {
|
|
throw new Exception($loginJson->message, $loginJson->httpStatus);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* @param int $page
|
|
* @return Generator
|
|
* @throws Exception
|
|
*/
|
|
public function getBooks(int $page = 0)
|
|
{
|
|
//logowanie
|
|
$this->login();
|
|
//pobieranie listy ksiązek
|
|
$return = $this->c('https://www.packtpub.com/account/my-ebooks?page=' . $page);
|
|
$document = new Document($return);
|
|
|
|
$books = $document->find('.product-line.unseen');
|
|
foreach ($books as $book) {
|
|
$bookData = [];
|
|
$bookData['nid'] = (int)$book->attr('nid');
|
|
$bookData['title'] = str_replace(["\r\n"], '', trim($book->first('.title::text')));
|
|
$bookData['ebook_isbn'] = $book->first('div[isbn]::attr(isbn)');
|
|
$bookData['img'] = $book->first('img[class*=imagecache]::attr(src)');
|
|
$bookData['url'] = $this->truncateBookUrl($book->first('div[class*=product-thumbnail]')->first('a::attr(href)'));
|
|
$bookData['pdf'] = $this->truncateBookUrl($book->first('a[href$=pdf]::attr(href)'));
|
|
$bookData['epub'] = $this->truncateBookUrl($book->first('a[href$=epub]::attr(href)'));
|
|
$bookData['mobi'] = $this->truncateBookUrl($book->first('a[href$=mobi]::attr(href)'));
|
|
$bookData['code'] = $this->truncateBookUrl($book->first('a[href*=code_download]::attr(href)'));
|
|
yield $bookData;
|
|
}
|
|
}
|
|
|
|
public function getAllBooks()
|
|
{
|
|
|
|
$this->login();
|
|
|
|
$data = $this->c(self::BASE_URL . 'mapt-rest/users/me/purchases?limit=100&offset=0&order=DESC&sort=purchase_date');
|
|
$jsonData = json_decode($data);
|
|
if (isset($jsonData->httpStatus, $loginJson->status) && $jsonData->httpStatus == 200 && $jsonData->status == 'success'){
|
|
return $jsonData['data'];
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* zwraca ilość stron książek
|
|
*/
|
|
public function getNumberOfPages()
|
|
{
|
|
$this->login();
|
|
$return = $this->c(self::MY_BOOKS_PAGE);
|
|
$document = new Document($return);
|
|
return (int)$document->first('.solr-pager-page-selector a:last-child')->text();
|
|
}
|
|
|
|
/**
|
|
* @param int $nid
|
|
* @return bool
|
|
*/
|
|
public function isBookByNid(int $nid)
|
|
{
|
|
$stmt = $this->getDatabase()->prepare('SELECT nid FROM book WHERE nid = :nid');
|
|
$stmt->bindValue(':nid', $nid, SQLITE3_INTEGER);
|
|
$result = $stmt->execute();
|
|
return $result->fetchArray(SQLITE3_ASSOC) === false ? false : true;
|
|
}
|
|
|
|
/**
|
|
* @param int $nid
|
|
* @return array
|
|
*/
|
|
public function getBookByNid(int $nid)
|
|
{
|
|
$stmt = $this->getDatabase()->prepare('SELECT * FROM book WHERE nid = :nid');
|
|
$stmt->bindValue(':nid', $nid, SQLITE3_INTEGER);
|
|
$result = $stmt->execute();
|
|
return $result->fetchArray(SQLITE3_ASSOC);
|
|
}
|
|
|
|
|
|
public function c($url, $post = [], $localFilePath = false)
|
|
{
|
|
$cookie = "cookie.txt";
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $url);
|
|
|
|
|
|
if (!is_null($this->accessToken)) {
|
|
$authorization = "Authorization: Bearer " . $this->accessToken;
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json', $authorization));
|
|
}
|
|
|
|
|
|
if (!empty($post) && count($post)) {
|
|
curl_setopt($ch, CURLOPT_POST, 1);
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($post));
|
|
}
|
|
|
|
if ($localFilePath) {
|
|
$fp = fopen($localFilePath, 'w+');
|
|
curl_setopt($ch, CURLOPT_FILE, $fp);
|
|
} else {
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
|
}
|
|
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
|
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
|
|
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
|
|
|
|
$server_output = curl_exec($ch);
|
|
curl_close($ch);
|
|
return $server_output;
|
|
}
|
|
|
|
public function saveBooks()
|
|
{
|
|
foreach ($this->getBooks() as $basicBookInfo) {
|
|
if ($this->isBookByNid($basicBookInfo['nid'])) {
|
|
$stmt = $this->getDatabase()->prepare('UPDATE book SET nid = :nid, title = :title, ebook_isbn = :ebook_isbn, img = :img, url = :url, pdf = :pdf, epub = :epub, mobi = :mobi, code = :code, upd_dt = (DATETIME(\'now\')) WHERE nid = :nid');
|
|
} else {
|
|
$stmt = $this->getDatabase()->prepare('INSERT INTO book (nid, title, ebook_isbn, img, url, pdf, epub, mobi, code)
|
|
VALUES (:nid, :title, :ebook_isbn, :img, :url, :pdf, :epub, :mobi, :code)');
|
|
}
|
|
$stmt->bindValue(':nid', $basicBookInfo['nid'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':title', $basicBookInfo['title'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':ebook_isbn', $basicBookInfo['ebook_isbn'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':img', $basicBookInfo['img'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':url', $basicBookInfo['url'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':pdf', $basicBookInfo['pdf'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':epub', $basicBookInfo['epub'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':mobi', $basicBookInfo['mobi'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':code', $basicBookInfo['code'], SQLITE3_TEXT);
|
|
$stmt->execute();
|
|
}
|
|
}
|
|
|
|
public function saveBookDetails($bookDetails)
|
|
{
|
|
print_r($bookDetails['nid']);
|
|
if ($this->isBookByNid($bookDetails['nid'])) {
|
|
print_r($bookDetails);
|
|
|
|
echo 'xxx';
|
|
|
|
$stmt = $this->getDatabase()->prepare('UPDATE book SET datepublished = :datepublished, reviewCount = :reviewCount, ratingValue = :ratingValue, numberofpages = :numberofpages, upd_dt = (DATETIME(\'now\')) WHERE nid = :nid');
|
|
$stmt->bindValue(':datepublished', $bookDetails['datepublished'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':reviewCount', $bookDetails['reviewCount'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':ratingValue', $bookDetails['ratingValue'], SQLITE3_FLOAT);
|
|
$stmt->bindValue(':numberofpages', $bookDetails['numberofpages'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':nid', $bookDetails['nid'], SQLITE3_INTEGER);
|
|
print_r($stmt->execute());
|
|
}
|
|
|
|
|
|
}
|
|
|
|
public function getBookDetails($bookUrl)
|
|
{
|
|
$bookUrl = $this->resolveBookUrl($bookUrl);
|
|
$bookData = [];
|
|
$bookPage = new Document($bookUrl, true);
|
|
$bookData['datepublished'] = $bookPage->first('.book-top-block-info-authors time[itemprop="datePublished"]::attr(datetime)');
|
|
$bookData['numberofpages'] = $bookPage->first('span[itemprop="numberOfPages"]::text');
|
|
$bookData['paper_isbn'] = $bookPage->first('.book-info-details span[itemprop="isbn"]::text');
|
|
$bookData['ebook_isbn'] = $bookPage->first('div.digital-product[data-product-type="ebooks"]::attr(data-product-id)');
|
|
$bookData['ebook_price'] = $bookPage->first('div.digital-product[data-product-type="ebooks"]::attr(data-product-price)');
|
|
$bookData['paper_price'] = $bookPage->first('div.physical-product[data-product-type="books"]::attr(data-product-price)');
|
|
$bookData['reviewCount'] = $bookPage->first('meta[itemprop="reviewCount"]::attr(content)');
|
|
$bookData['ratingValue'] = $bookPage->first('meta[itemprop="ratingValue"]::attr(content)');
|
|
|
|
$bookData['toc'] = [];
|
|
foreach ($bookPage->find('#book-info-toc.onlyDesktop .book-toc-chapter') as $chapter) {
|
|
if (!is_null($chapter->first('div[class*="book-toc-chapter-title"]'))) {
|
|
$tocSection = [];
|
|
$tocSection['title'] = trim($chapter->first('div[class*="book-toc-chapter-title"]')->text());
|
|
$tocSection['subchapters'] = $chapter->find('div[class*="book-toc-section-text"]::text');
|
|
$bookData['toc'][] = $tocSection;
|
|
}
|
|
}
|
|
|
|
$bookData['title'] = $bookPage->first('h1[itemprop="name"]::text');
|
|
$bookData['description'] = implode(' ', $bookPage->find('div.book-info-bottom-indetail-text[itemprop="description"] p::text'));
|
|
$bookData['willLearn'] = $bookPage->find('div.book-info-will-learn-text li::text');
|
|
|
|
$bookData['category'] = $bookPage->first('div[data-product-id="' . $bookData['paper_isbn'] . '"]::attr(data-product-category)');
|
|
$bookData['nid'] = $bookPage->first('div.digital-product[data-product-type="ebooks"]::attr(data-product-nid)');
|
|
|
|
$bookData['authors'] = [];
|
|
foreach ($bookPage->find('[itemprop="author"]') as $author) {
|
|
|
|
$authorData = [];
|
|
$authorData['name'] = trim($author->first('.book-info-bottom-author-title')->text());
|
|
$authorData['bio'] = preg_replace('#\s{2,}#', ' ', trim($author->first('.book-info-bottom-author-body')->text()));
|
|
$bookData['authors'][] = $authorData;
|
|
}
|
|
return $bookData;
|
|
}
|
|
|
|
public function getRandomDatabaseBook()
|
|
{
|
|
$stmt = $this->getDatabase()->prepare('SELECT * FROM book ORDER BY RANDOM() LIMIT 1');
|
|
$result = $stmt->execute();
|
|
return $result->fetchArray(SQLITE3_ASSOC);
|
|
}
|
|
|
|
}
|
|
|
|
echo '<pre>';
|
|
|
|
$packt = new Packt($credentials);
|
|
|
|
|
|
//var_dump($packt->getNumberOfPages());
|
|
|
|
$packt->getAllBooks();
|
|
die();
|
|
|
|
$books = $packt->getBooks();
|
|
print_r(iterator_to_array($books));
|
|
die();
|
|
|
|
echo '<pre>';
|
|
|
|
//$getRandomDatabaseBook = $packt->getRandomDatabaseBook();
|
|
|
|
//print_r($getRandomDatabaseBook);
|
|
|
|
//if(!empty($getRandomDatabaseBook['url'])){
|
|
$bookDetails = $packt->getBookDetails("/networking-and-servers/windows-server-2016-automation-powershell-cookbook-second-edition");
|
|
print_r($bookDetails);
|
|
$packt->saveBookDetails($bookDetails);
|
|
//}
|
|
|
|
|
|
//print_r($packt->getBookByNid($bookDetails['nid']));
|
|
|
|
|
|
die();
|
|
|
|
$bookDetails = $packt->getBookDetails('networking-and-servers/windows-server-2016-automation-powershell-cookbook-second-edition');
|
|
print_r($bookDetails);
|
|
$packt->saveBookDetails($bookDetails);
|
|
|
|
|
|
die();
|
|
|
|
function c($url, $post = '', $localFilePath = null)
|
|
{
|
|
$cookie = "cookie.txt";
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $url);
|
|
|
|
if (!empty($post)) {
|
|
curl_setopt($ch, CURLOPT_POST, 1);
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
|
|
}
|
|
|
|
/**
|
|
* this means url is a path to a file and needs to be saved localy under $localFilePath location
|
|
*/
|
|
if (!is_null($localFilePath)) {
|
|
$fp = fopen($localFilePath, 'w+');
|
|
curl_setopt($ch, CURLOPT_FILE, $fp);
|
|
} else {
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
|
}
|
|
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
|
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
|
|
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
|
|
|
|
$server_output = curl_exec($ch);
|
|
strlen($server_output);
|
|
curl_close($ch);
|
|
return $server_output;
|
|
}
|
|
|
|
function resolveBookUrl($bookUrl)
|
|
{
|
|
return 'https://www.packtpub.com/' . trim(str_replace('https://www.packtpub.com/', '', $bookUrl), '/');
|
|
}
|
|
|
|
function truncateBookUrl($bookUrl)
|
|
{
|
|
return str_replace('https://www.packtpub.com', '', $bookUrl);
|
|
}
|
|
|
|
function getBookInfo($bookUrl)
|
|
{
|
|
|
|
$bookUrl = resolveBookUrl($bookUrl);
|
|
$bookData = [];
|
|
$bookPage = new Document($bookUrl, true);
|
|
$bookData['datepublished'] = $bookPage->first('.book-top-block-info-authors time[itemprop="datePublished"]::attr(datetime)');
|
|
$bookData['numberofpages'] = $bookPage->first('span[itemprop="numberOfPages"]::text');
|
|
$bookData['isbn'] = $bookPage->first('span[itemprop="isbn"]::text');
|
|
$bookData['reviewCount'] = $bookPage->first('meta[itemprop="reviewCount"]::attr(content)');
|
|
$bookData['ratingValue'] = $bookPage->first('meta[itemprop="ratingValue"]::attr(content)');
|
|
|
|
$bookData['toc'] = [];
|
|
foreach ($bookPage->find('#book-info-toc.onlyDesktop .book-toc-chapter') as $chapter) {
|
|
if (!is_null($chapter->first('div[class*="book-toc-chapter-title"]'))) {
|
|
$tocSection = [];
|
|
$tocSection['title'] = trim($chapter->first('div[class*="book-toc-chapter-title"]')->text());
|
|
$tocSection['subchapters'] = $chapter->find('div[class*="book-toc-section-text"]::text');
|
|
$bookData['toc'][] = $tocSection;
|
|
}
|
|
}
|
|
|
|
$bookData['description'] = implode(' ', $bookPage->find('div.book-info-bottom-indetail-text[itemprop="description"] p::text'));
|
|
$bookData['willLearn'] = $bookPage->find('div.book-info-will-learn-text li::text');
|
|
|
|
$bookData['category'] = $bookPage->first('div[data-product-id="' . $bookData['isbn'] . '"]::attr(data-product-category)');
|
|
|
|
$bookData['authors'] = [];
|
|
foreach ($bookPage->find('[itemprop="author"]') as $author) {
|
|
|
|
$authorData = [];
|
|
$authorData['name'] = trim($author->first('.book-info-bottom-author-title')->text());
|
|
$authorData['bio'] = preg_replace('#\s{2,}#', ' ', trim($author->first('.book-info-bottom-author-body')->text()));
|
|
$bookData['authors'][] = $authorData;
|
|
}
|
|
return $bookData;
|
|
}
|
|
|
|
|
|
function saveBookChapters($data, $bookId)
|
|
{
|
|
$db = new SQLite3('data.db');
|
|
print_r($data);
|
|
foreach ($data['toc'] as $chapterNumber => $chapter) {
|
|
|
|
$stmt = $db->prepare('REPLACE INTO chapter (book_id, chapter_number, parent_id, name)
|
|
VALUES (:book_id, :chapter_number, :parent_id, :name)');
|
|
$stmt->bindValue(':book_id', $bookId, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':chapter_number', $chapterNumber + 1, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':parent_id', null, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':name', $chapter['title'], SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
|
|
$stmt = $db->prepare('select seq from sqlite_sequence where name="chapter"');
|
|
$result = $stmt->execute();
|
|
$lastInsertedId = $result->fetchArray(SQLITE3_ASSOC);
|
|
$lastInsertedId['seq'];
|
|
|
|
if (isset($chapter['subchapters'])) {
|
|
foreach ($chapter['subchapters'] as $subChapterNumber => $subChapterName) {
|
|
|
|
$stmt = $db->prepare('REPLACE INTO chapter (book_id, chapter_number, parent_id, name)
|
|
VALUES (:book_id, :chapter_number, :parent_id, :name)');
|
|
$stmt->bindValue(':book_id', $bookId, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':chapter_number', $subChapterNumber + 1, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':parent_id', $lastInsertedId['seq'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':name', $subChapterName, SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
echo '<pre>';
|
|
|
|
//$data = getBookInfo('big-data-and-business-intelligence/mastering-blockchain');
|
|
//file_put_contents('data.txt', json_encode($data));
|
|
//die();
|
|
$db = new SQLite3('data.db');
|
|
//$stmt = $db->prepare('SELECT * FROM book ORDER BY random() limit 1 ');
|
|
$stmt = $db->prepare('select * from book');
|
|
$booksResult = $stmt->execute();
|
|
while ($book = $booksResult->fetchArray(SQLITE3_ASSOC)) {
|
|
$data = getBookInfo($book['url']);
|
|
// saveBookChapters($data, $book['id']);
|
|
|
|
// $db = new SQLite3('data.db');
|
|
// print_r($data);
|
|
foreach ($data['toc'] as $chapterNumber => $chapter) {
|
|
|
|
$stmt = $db->prepare('REPLACE INTO chapter (book_id, chapter_number, parent_id, name)
|
|
VALUES (:book_id, :chapter_number, :parent_id, :name)');
|
|
$stmt->bindValue(':book_id', $book['id'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':chapter_number', $chapterNumber + 1, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':parent_id', null, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':name', $chapter['title'], SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
|
|
$stmt = $db->prepare('select seq from sqlite_sequence where name="chapter"');
|
|
$result = $stmt->execute();
|
|
$lastInsertedId = $result->fetchArray(SQLITE3_ASSOC);
|
|
$lastInsertedId['seq'];
|
|
|
|
if (isset($chapter['subchapters'])) {
|
|
foreach ($chapter['subchapters'] as $subChapterNumber => $subChapterName) {
|
|
|
|
$stmt = $db->prepare('REPLACE INTO chapter (book_id, chapter_number, parent_id, name)
|
|
VALUES (:book_id, :chapter_number, :parent_id, :name)');
|
|
$stmt->bindValue(':book_id', $book['id'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':chapter_number', $subChapterNumber + 1, SQLITE3_INTEGER);
|
|
$stmt->bindValue(':parent_id', $lastInsertedId['seq'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':name', $subChapterName, SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
|
|
//$data = json_decode(file_get_contents('data.txt'), 1);
|
|
//$bookId = 1;
|
|
|
|
|
|
//print_r($data);
|
|
|
|
|
|
function fetchAuthorsByBooks()
|
|
{
|
|
$db = new SQLite3('data.db');
|
|
|
|
//$stmt = $db->prepare('SELECT * FROM book order by random() limit 1 ');
|
|
$stmt = $db->prepare('select * from book');
|
|
$booksResult = $stmt->execute();
|
|
|
|
|
|
//$bookData = $result->fetchArray(SQLITE3_ASSOC);
|
|
while ($book = $booksResult->fetchArray(SQLITE3_ASSOC)) {
|
|
$bookInfo = getBookInfo($book['url']);
|
|
foreach ($bookInfo['authors'] as $author) {
|
|
$stmt = $db->prepare('SELECT * FROM author WHERE name = :name and bio = :bio');
|
|
$stmt->bindValue(':name', trim($author['name']), SQLITE3_TEXT);
|
|
$stmt->bindValue(':bio', $author['bio'], SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
$authorData = $result->fetchArray(SQLITE3_ASSOC);
|
|
|
|
if (!$authorData) {
|
|
$stmt = $db->prepare('INSERT INTO author(name, bio) VALUES (:name, :bio)');
|
|
$stmt->bindValue(':name', trim($author['name']), SQLITE3_TEXT);
|
|
$stmt->bindValue(':bio', $author['bio'], SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
|
|
$stmt = $db->prepare('select seq from sqlite_sequence where name="author"');
|
|
$result = $stmt->execute();
|
|
$seqData = $result->fetchArray(SQLITE3_ASSOC);
|
|
|
|
$stmt = $db->prepare('INSERT INTO author_book(author_id, book_id) VALUES (:author_id, :book_id)');
|
|
$stmt->bindValue(':author_id', $seqData['seq'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':book_id', $book['id'], SQLITE3_INTEGER);
|
|
$result = $stmt->execute();
|
|
} else {
|
|
$stmt = $db->prepare('INSERT INTO author_book(author_id, book_id) VALUES (:author_id, :book_id)');
|
|
$stmt->bindValue(':author_id', $authorData['id'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':book_id', $book['id'], SQLITE3_INTEGER);
|
|
$result = $stmt->execute();
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
function downloadBooks()
|
|
{
|
|
|
|
global $loginData;
|
|
$return = c('https://www.packtpub.com/', $loginData);
|
|
$return = c('https://www.packtpub.com/account/my-ebooks');
|
|
$document = new Document($return);
|
|
//
|
|
//$document = new Document('packt.html', true);
|
|
|
|
$booksData = [];
|
|
$books = $document->find('.product-line.unseen');
|
|
|
|
$dl = 0;
|
|
$db = new SQLite3('data.db');
|
|
//shuffle($books);
|
|
foreach ($books as $book) {
|
|
|
|
$bookData = [];
|
|
$bookData['title'] = str_replace(["\r\n"], '', trim($book->first('.title::text')));
|
|
$bookData['nid'] = (int)$book->attr('nid');
|
|
|
|
$bookData['pdf'] = truncateBookUrl($book->first('a[href$=pdf]::attr(href)'));
|
|
$bookData['epub'] = truncateBookUrl($book->first('a[href$=epub]::attr(href)'));
|
|
$bookData['mobi'] = truncateBookUrl($book->first('a[href$=mobi]::attr(href)'));
|
|
$bookData['code'] = truncateBookUrl($book->first('a[href*=code_download]::attr(href)'));
|
|
$bookData['isbn'] = $book->first('div[isbn]::attr(isbn)');
|
|
$bookData['img'] = $book->first('img[class*=imagecache]::attr(src)');
|
|
$bookData['url'] = truncateBookUrl($book->first('div[class*=product-thumbnail]')->first('a::attr(href)'));
|
|
|
|
|
|
$stmt = $db->prepare('SELECT id FROM book WHERE nid = :nid');
|
|
$stmt->bindValue(':nid', $bookData['nid'], SQLITE3_INTEGER);
|
|
$result = $stmt->execute();
|
|
|
|
$resultData = $result->fetchArray(SQLITE3_ASSOC);
|
|
|
|
$dl += downloadBook($bookData['pdf']);
|
|
$dl += downloadBook($bookData['epub']);
|
|
$dl += downloadBook($bookData['mobi']);
|
|
$dl += downloadBook($bookData['code']);
|
|
|
|
|
|
if (!$resultData) {
|
|
$bookData['info'] = getBookInfo($bookData['url']);
|
|
$stmt = $db->prepare('REPLACE INTO book (nid, title, isbn, img, url, datepublished, numberofpages, reviewCount, ratingValue, category, pdf, epub, mobi, code)
|
|
VALUES (:nid, :title, :isbn, :img, :url, :datepublished, :numberofpages, :reviewCount, :ratingValue, :category, :pdf, :epub, :mobi, :code)');
|
|
$stmt->bindValue(':nid', $bookData['nid'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':title', $bookData['title'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':isbn', $bookData['isbn'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':img', $bookData['img'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':url', $bookData['url'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':datepublished', $bookData['info']['datepublished'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':numberofpages', $bookData['info']['numberofpages'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':reviewCount', $bookData['info']['reviewCount'], SQLITE3_INTEGER);
|
|
$stmt->bindValue(':ratingValue', (float)$bookData['info']['ratingValue'], SQLITE3_FLOAT);
|
|
$stmt->bindValue(':category', $bookData['info']['category'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':pdf', $bookData['pdf'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':epub', $bookData['epub'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':mobi', $bookData['mobi'], SQLITE3_TEXT);
|
|
$stmt->bindValue(':code', $bookData['code'], SQLITE3_TEXT);
|
|
$result = $stmt->execute();
|
|
}
|
|
echo $dl . ' - ';
|
|
if ($dl > 500) {
|
|
// var_dump($dl);
|
|
die();
|
|
}
|
|
}
|
|
}
|
|
|
|
function downloadBook($url)
|
|
{
|
|
|
|
if (strlen($url) && !file_exists(fileNameFromPath($url))) {
|
|
$localPath = fileNameFromPath($url);
|
|
$directory = directoryNameFromPath($url);
|
|
$fullUrl = resolveBookUrl($url);
|
|
|
|
if (!is_dir($directory)) {
|
|
mkdir($directory, 0777, true);
|
|
}
|
|
c($fullUrl, [], $localPath);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function fileNameFromPath($filePath)
|
|
{
|
|
$filePathParts = explode('/', trim($filePath, '/'));
|
|
return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR . $filePathParts[1] . '.' . (isset($filePathParts[2]) ? $filePathParts[2] : 'zip');
|
|
}
|
|
|
|
function directoryNameFromPath($filePath)
|
|
{
|
|
$filePathParts = explode('/', trim($filePath, '/'));
|
|
return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR;
|
|
}
|
|
|
|
|