From 293a776d16070a46dd1637296d8d195049cf489e Mon Sep 17 00:00:00 2001 From: krzysiej Date: Fri, 27 May 2022 10:54:39 +0200 Subject: [PATCH] Updated a way of parsing a single book page to find language, publisher and book description. Also, url of a page is now attached to the book info object. --- src/Api/AbstractBookInfo.php | 3 ++- src/BookFinder.php | 6 +++--- src/DataParser.php | 10 ++++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Api/AbstractBookInfo.php b/src/Api/AbstractBookInfo.php index cc286d7..99cecb4 100644 --- a/src/Api/AbstractBookInfo.php +++ b/src/Api/AbstractBookInfo.php @@ -14,6 +14,7 @@ abstract class AbstractBookInfo public int $pages; public string $cycle; public int $volume; - public string $language; + public ?string $language; public string $datePublished; + public ?string $publisher; } \ No newline at end of file diff --git a/src/BookFinder.php b/src/BookFinder.php index 5529256..ee7ec90 100644 --- a/src/BookFinder.php +++ b/src/BookFinder.php @@ -7,7 +7,7 @@ use Techtube\Bookinfo\Api\AbstractBookInfo; class BookFinder { - private static $searchUrl = 'https://lubimyczytac.pl/szukaj/ksiazki?phrase='; + private static string $searchUrl = 'https://lubimyczytac.pl/szukaj/ksiazki?phrase='; private DataParser $parser; @@ -21,12 +21,12 @@ class BookFinder return $this->parser->searchPage(new Document($this->getSearchUrl($phrase), true)); } - public function byUrl($url): AbstractBookInfo + public function byUrl(string $url): AbstractBookInfo { return $this->parser->singlePage(new Document($url, true)); } - public function getSearchUrl(string $phrase): string + private function getSearchUrl(string $phrase): string { return self::$searchUrl . $phrase; } diff --git a/src/DataParser.php b/src/DataParser.php index 7cb81c3..53d92a9 100644 --- a/src/DataParser.php +++ b/src/DataParser.php @@ -18,21 +18,23 @@ class DataParser { $info = new Info(); $jsonInfo = json_decode($document->first('script[type="application/ld+json"]')->text()); + $info->url = $document->getDocument()->baseURI; + $info->publisher = $document->first('a[href*="wydawnictwo"]')?->text(); $info->author = $document->first('meta[property="books:author"]')->getAttribute('content'); $info->isbn = $document->first('meta[property="books:isbn"]')->getAttribute('content'); - $info->description = $document->first('meta[property="og:description"]')->getAttribute('content'); + $info->description = trim($document->first('#book-description p')->text()); $info->title = trim($document->first('h1.book__title')->text()); $info->category = trim($document->first('.book__category')->text()); $info->cover_url = $this->generateCoverUrls( $document->first('meta[property="og:image"]')->getAttribute('content') ); $info->pages = (int)$document->first('span.book__pages')?->text(); - if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($document->first('a[href*="/cykl/"]')?->text()), $series)) { + if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($document->first('a[href*="/cykl/"]')?->text() ?? ''), $series)) { $info->cycle = $series[1]; $info->volume = $series[2]; } - $info->language = $jsonInfo->inLanguage ?? null; - $info->datePublished = $jsonInfo->datePublished ?? null; + $info->language = $jsonInfo?->inLanguage ?? trim($document->xpath("//*[contains(text(), 'Język:')]")[0]->nextSibling('dd')->text()); + $info->datePublished = $jsonInfo?->datePublished ?? null; return $info; }