$settings['email'], 'password' => $settings['password'], 'op' => 'Login', 'form_build_id' => 'form-fba4b62ee04aafbf045b1d9ae019d90b', 'form_id' => 'packt_user_login_form' ]; function c($url, $post = []) { $cookie = "cookie.txt"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($post)); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie); // receive server response ... curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $server_output = curl_exec($ch); strlen($server_output); // $info = curl_getinfo($ch); curl_close($ch); return $server_output; } function getBookInfo($bookUrl) { print_r($bookUrl . "\n"); $bookData = []; $bookPage = new Document($bookUrl, true); $bookData['datepublished'] = $bookPage->first('.book-top-block-info-authors time[itemprop="datePublished"]::attr(datetime)'); $bookData['numberofpages'] = $bookPage->first('span[itemprop="numberOfPages"]::text'); $bookData['isbn'] = $bookPage->first('span[itemprop="isbn"]::text'); $bookData['reviewCount'] = $bookPage->first('meta[itemprop="reviewCount"]::attr(content)'); $bookData['ratingValue'] = $bookPage->first('meta[itemprop="ratingValue"]::attr(content)'); $bookData['toc'] = []; foreach ($bookPage->find('#book-info-toc.onlyDesktop .book-toc-chapter') as $chapter) { if (!is_null($chapter->first('div[class*="book-toc-chapter-title"]'))) { $tocSection = []; $tocSection['title'] = trim($chapter->first('div[class*="book-toc-chapter-title"]')->text()); $tocSection['subchapters'] = $chapter->find('div[class*="book-toc-section-text"]::text'); $bookData['toc'][] = $tocSection; } } $bookData['description'] = $bookPage->find('div.book-info-bottom-indetail-text[itemprop="description"] p::text'); $bookData['willLearn'] = $bookPage->find('div.book-info-will-learn-text li::text'); $bookData['category'] = $bookPage->first('div[data-product-id="' . $bookData['isbn'] . '"]::attr(data-product-category)'); $bookData['authors'] = []; foreach ($bookPage->find('[itemprop="author"]') as $author) { $authorData = []; $authorData['name'] = $author->first('h3::text'); $authorData['bio'] = $author->find('p::text'); $bookData['authors'][] = $authorData; } return $bookData; } //$return = c('https://www.packtpub.com/', $loginData); //$return = c('https://www.packtpub.com/account/my-ebooks'); //var_dump($return); //var_dump($return); //$document = new Document($return); $document = new Document('packt.html', true); $booksData = []; $books = $document->find('.product-line.unseen'); //$books = array_slice($books, 2, 1); $bookData = json_decode('books.txt', 1); foreach ($books as $book) { // print_r($book->html()); $bookData = []; $bookData['title'] = str_replace(["\r\n"], '', trim($book->first('.title::text'))); $bookData['nid'] = (int)$book->attr('nid'); $bookData['pdf'] = $book->first('a[href$=pdf]::attr(href)'); $bookData['epub'] = $book->first('a[href$=epub]::attr(href)'); $bookData['mobi'] = $book->first('a[href$=mobi]::attr(href)'); $bookData['code'] = $book->first('a[href*=code_download]::attr(href)'); $bookData['isbn'] = $book->first('div[isbn]::attr(isbn)'); $bookData['img'] = $book->first('img[class*=imagecache]::attr(src)'); $bookData['url'] = $book->first('div[class*=product-thumbnail]')->first('a::attr(href)'); $bookData['info'] = getBookInfo($bookData['url']); $booksData[$bookData['nid']] = $bookData; } print_r($booksData); file_put_contents('books.txt', json_encode($booksData)); //print_r($return);