From 7d70ad40cb2b4c6b86f60bc14bc9e0ac900a65bc Mon Sep 17 00:00:00 2001 From: kplaczek Date: Wed, 25 May 2022 19:58:15 +0200 Subject: [PATCH] Excluded data parsing to a data parser class that handles document and returns info class or array of those. --- src/Api/AbstractBookInfo.php | 5 ++-- src/BookFinder.php | 19 ++++++++++-- src/BookInfo.php | 36 ----------------------- src/DataParser.php | 57 ++++++++++++++++++++++++++++++++++++ src/Info.php | 10 +++++++ 5 files changed, 85 insertions(+), 42 deletions(-) delete mode 100644 src/BookInfo.php create mode 100644 src/DataParser.php create mode 100644 src/Info.php diff --git a/src/Api/AbstractBookInfo.php b/src/Api/AbstractBookInfo.php index 1c89093..378e190 100644 --- a/src/Api/AbstractBookInfo.php +++ b/src/Api/AbstractBookInfo.php @@ -4,6 +4,7 @@ namespace Techtube\Bookinfo\Api; abstract class AbstractBookInfo { + public string $url; public string $author; public string $isbn; public string $description; @@ -11,10 +12,8 @@ abstract class AbstractBookInfo public string $category; public string $cover_url; public int $pages; - public string $series; + public string $cycle; public int $volume; public string $language; public string $datePublished; - - abstract public function parse(); } \ No newline at end of file diff --git a/src/BookFinder.php b/src/BookFinder.php index a72bb61..5529256 100644 --- a/src/BookFinder.php +++ b/src/BookFinder.php @@ -7,15 +7,28 @@ use Techtube\Bookinfo\Api\AbstractBookInfo; class BookFinder { + private static $searchUrl = 'https://lubimyczytac.pl/szukaj/ksiazki?phrase='; - public function search(string $searchText): \Exception + private DataParser $parser; + + public function __construct() { - return new \Exception('need to implement this'); + $this->parser = new DataParser(); + } + + public function search(string $phrase): array + { + return $this->parser->searchPage(new Document($this->getSearchUrl($phrase), true)); } public function byUrl($url): AbstractBookInfo { - return (new BookInfo(new Document($url, true)))->parse(); + return $this->parser->singlePage(new Document($url, true)); + } + + public function getSearchUrl(string $phrase): string + { + return self::$searchUrl . $phrase; } } diff --git a/src/BookInfo.php b/src/BookInfo.php deleted file mode 100644 index c56b35b..0000000 --- a/src/BookInfo.php +++ /dev/null @@ -1,36 +0,0 @@ -document = $document; - } - - public function parse(): self - { - $jsonInfo = json_decode($this->document->first('script[type="application/ld+json"]')->text()); - $this->author = $this->document->first('meta[property="books:author"]')->getAttribute('content'); - $this->isbn = $this->document->first('meta[property="books:isbn"]')->getAttribute('content'); - $this->description = $this->document->first('meta[property="og:description"]')->getAttribute('content'); - $this->title = trim($this->document->first('h1.book__title')->text()); - $this->category = trim($this->document->first('.book__category')->text()); - $this->cover_url = $this->document->first('meta[property="og:image"]')->getAttribute('content'); - $this->pages = (int)$this->document->first('span.book__pages')?->text(); - if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($this->document->first('a[href*="/cykl/"]')?->text()), $series)) { - $this->series = $series[1]; - $this->volume = $series[2]; - } - $this->language = $jsonInfo->inLanguage ?? null; - $this->datePublished = $jsonInfo->datePublished ?? null; - return $this; - } -} - diff --git a/src/DataParser.php b/src/DataParser.php new file mode 100644 index 0000000..d5dfba3 --- /dev/null +++ b/src/DataParser.php @@ -0,0 +1,57 @@ +first('script[type="application/ld+json"]')->text()); + $info->author = $document->first('meta[property="books:author"]')->getAttribute('content'); + $info->isbn = $document->first('meta[property="books:isbn"]')->getAttribute('content'); + $info->description = $document->first('meta[property="og:description"]')->getAttribute('content'); + $info->title = trim($document->first('h1.book__title')->text()); + $info->category = trim($document->first('.book__category')->text()); + $info->cover_url = $document->first('meta[property="og:image"]')->getAttribute('content'); + $info->pages = (int)$document->first('span.book__pages')?->text(); + if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($document->first('a[href*="/cykl/"]')?->text()), $series)) { + $info->cycle = $series[1]; + $info->volume = $series[2]; + } + $info->language = $jsonInfo->inLanguage ?? null; + $info->datePublished = $jsonInfo->datePublished ?? null; + + return $info; + } + + /** + * @param Document $document + * @return AbstractBookInfo[] + * @throws InvalidSelectorException + */ + public function searchPage(Document $document): array + { + $books = $document->find('#search .authorAllBooks__single'); + + $booksInfo = []; + foreach ($books as $book) { + $bookInfo = new Info(); + $bookInfo->title = trim($book->first('.authorAllBooks__singleTextTitle')->text()); + $bookInfo->author = trim($book->first('.authorAllBooks__singleTextAuthor')->text()); + $bookInfo->url = $book->first('button[data-book-url]')->getAttribute('data-book-url'); + $bookInfo->cover_url = $book->first('.img-fluid')->getAttribute('src'); + $booksInfo[] = $bookInfo; + } + return $booksInfo; + } +} \ No newline at end of file diff --git a/src/Info.php b/src/Info.php new file mode 100644 index 0000000..4d941d3 --- /dev/null +++ b/src/Info.php @@ -0,0 +1,10 @@ +