From f4c4d029e6597b4e667c0ce23f2555156b71a651 Mon Sep 17 00:00:00 2001 From: krzysiej Date: Tue, 8 May 2018 15:39:49 +0200 Subject: [PATCH] =?UTF-8?q?Dodanie=20sposobu=20na=20pobieranie=20ksi=C4=85?= =?UTF-8?q?=C5=BCek,=20dodanie=20wy=C5=9Bwietlania=20link=C3=B3w=20do=20po?= =?UTF-8?q?branych=20plik=C3=B3w,=20dodanie=20katalogu=20books=20do=20igno?= =?UTF-8?q?re.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + index.php | 19 ++++++++++-- packt.php | 87 +++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 87 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 219b110..ccb7796 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ cookie.txt vendor/ data.db books.txt +/books diff --git a/index.php b/index.php index 0a8d87b..4e34002 100644 --- a/index.php +++ b/index.php @@ -13,12 +13,25 @@ $result = $stmt->execute(); echo '
';
 
 echo '';
-print('');
+print('');
 while ($resultArray = $result->fetchArray(SQLITE3_ASSOC)) {
 //    print_r($resultArray);
 
-    printf('',
-        $resultArray['id'], $resultArray['nid'], $resultArray['title'], $resultArray['category'], $resultArray['datepublished'], $resultArray['numberofpages'], $resultArray['reviewCount'], $resultArray['ratingValue']);
+    $pdfUrl = 'books/' . $resultArray['nid'] . '/' . $resultArray['nid'] . '.pdf';
+    $epubUrl = 'books/' . $resultArray['nid'] . '/' . $resultArray['nid'] . '.epub';
+    $mobiUrl = 'books/' . $resultArray['nid'] . '/' . $resultArray['nid'] . '.mobi';
+    $codeUrl = '';
+    if (preg_match('#\d+#', $resultArray['code'], $found)) {
+        $codeId = $found[0];
+        $codeUrl = 'books/' . $codeId . '/' . $codeId . '.zip';
+    }
+    printf('',
+        $resultArray['id'], $resultArray['nid'], $resultArray['title'], $resultArray['category'], $resultArray['datepublished'], $resultArray['numberofpages'], $resultArray['reviewCount'], $resultArray['ratingValue'],
+        is_file($pdfUrl) ? "pdf" : (strlen($resultArray['pdf']) ? 'nie pobrano' : ''),
+        is_file($epubUrl) ? "epub" : (strlen($resultArray['epub']) ? 'nie pobrano' : ''),
+        is_file($mobiUrl) ? "mobi" : (strlen($resultArray['mobi']) ? 'nie pobrano' : ''),
+        is_file($codeUrl) ? "zip" : (strlen($resultArray['code']) ? 'nie pobrano' : '')
+    );
     if ($resultArray) {
         $x = $resultArray;
     }
diff --git a/packt.php b/packt.php
index 1218ad8..24ec5ff 100644
--- a/packt.php
+++ b/packt.php
@@ -15,37 +15,41 @@ $loginData = [
 ];
 
 
-function c($url, $post = [])
+function c($url, $post = [], $localFilePath = null)
 {
     $cookie = "cookie.txt";
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_URL, $url);
-    curl_setopt($ch, CURLOPT_POST, 1);
 
-    curl_setopt($ch, CURLOPT_POSTFIELDS,
-        http_build_query($post));
+    if (!empty($post) && count($post)) {
+        curl_setopt($ch, CURLOPT_POST, 1);
+        curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($post));
+    }
+
+    /**
+     * this means url is a path to a file and needs to be saved localy under $localFilePath location
+     */
+    if (!is_null($localFilePath)) {
+        $fp = fopen($localFilePath, 'w+');
+        curl_setopt($ch, CURLOPT_FILE, $fp);
+    } else {
+        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+    }
+
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
     curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
     curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
 
-    // receive server response ...
-    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
-
     $server_output = curl_exec($ch);
     strlen($server_output);
-
-
     curl_close($ch);
-
     return $server_output;
-
-
 }
 
 
 function resolveBookUrl($bookUrl)
 {
-    return 'https://www.packtpub.com/' . str_replace('https://www.packtpub.com/', '', $bookUrl);
+    return 'https://www.packtpub.com/' . trim(str_replace('https://www.packtpub.com/', '', $bookUrl), '/');
 }
 
 function truncateBookUrl($bookUrl)
@@ -95,14 +99,15 @@ function getBookInfo($bookUrl)
 $return = c('https://www.packtpub.com/', $loginData);
 $return = c('https://www.packtpub.com/account/my-ebooks');
 $document = new Document($return);
-
+//
 //$document = new Document('packt.html', true);
 
 $booksData = [];
 $books = $document->find('.product-line.unseen');
 
-
+$dl = 0;
 $db = new SQLite3('data.db');
+shuffle($books);
 foreach ($books as $book) {
 
     $bookData = [];
@@ -123,9 +128,14 @@ foreach ($books as $book) {
     $result = $stmt->execute();
 
     $resultData = $result->fetchArray(SQLITE3_ASSOC);
+
+    $dl += downloadBook($bookData['pdf']);
+    $dl += downloadBook($bookData['epub']);
+    $dl += downloadBook($bookData['mobi']);
+    $dl += downloadBook($bookData['code']);
+
+
     if (!$resultData) {
-
-
         $bookData['info'] = getBookInfo($bookData['url']);
         $stmt = $db->prepare('REPLACE INTO book (nid, title, isbn, img, url, datepublished, numberofpages, reviewCount, ratingValue, category, pdf, epub, mobi, code)
 VALUES  (:nid, :title, :isbn, :img, :url, :datepublished, :numberofpages, :reviewCount, :ratingValue, :category, :pdf, :epub, :mobi, :code)');
@@ -145,4 +155,47 @@ VALUES  (:nid, :title, :isbn, :img, :url, :datepublished, :numberofpages, :revie
         $stmt->bindValue(':code', $bookData['code'], SQLITE3_TEXT);
         $result = $stmt->execute();
     }
+    echo $dl.' - ';
+    if ($dl > 500) {
+//        var_dump($dl);
+        die();
+    }
+
 }
+
+function downloadBook($url)
+{
+
+    if (strlen($url) && !fileExists(fileNameFromPath($url))) {
+        $localPath = fileNameFromPath($url);
+        $directory = directoryNameFromPath($url);
+        $fullUrl = resolveBookUrl($url);
+
+        echo ($fullUrl)."\n";
+//        var_dump($localPath);
+        if (!is_dir($directory)) {
+            mkdir($directory, 0777, true);
+        }
+        c($fullUrl, [], $localPath);
+        return 1;
+    }
+    return 0;
+}
+
+function fileNameFromPath($filePath)
+{
+    $filePathParts = explode('/', trim($filePath, '/'));
+    return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR . $filePathParts[1] . '.' . (isset($filePathParts[2]) ? $filePathParts[2] : 'zip');
+}
+
+function directoryNameFromPath($filePath)
+{
+    $filePathParts = explode('/', trim($filePath, '/'));
+    return 'books' . DIRECTORY_SEPARATOR . $filePathParts[1] . DIRECTORY_SEPARATOR;
+}
+
+function fileExists($filePath)
+{
+    return file_exists($filePath);
+}
+
idnidTitleCategoryPublish datePagesVotesRating
idnidTitleCategoryPublish datePagesVotesRating Pdf Epub Mobi Code
%d%d%s%s%s%s%d%.1f
%d%d%s%s%s%s%d%.1f %s%s%s%s