Updated a way of parsing a single book page to find language, publisher and book description. Also, url of a page is now attached to the book info object.
This commit is contained in:
@@ -14,6 +14,7 @@ abstract class AbstractBookInfo
|
|||||||
public int $pages;
|
public int $pages;
|
||||||
public string $cycle;
|
public string $cycle;
|
||||||
public int $volume;
|
public int $volume;
|
||||||
public string $language;
|
public ?string $language;
|
||||||
public string $datePublished;
|
public string $datePublished;
|
||||||
|
public ?string $publisher;
|
||||||
}
|
}
|
||||||
@@ -7,7 +7,7 @@ use Techtube\Bookinfo\Api\AbstractBookInfo;
|
|||||||
|
|
||||||
class BookFinder
|
class BookFinder
|
||||||
{
|
{
|
||||||
private static $searchUrl = 'https://lubimyczytac.pl/szukaj/ksiazki?phrase=';
|
private static string $searchUrl = 'https://lubimyczytac.pl/szukaj/ksiazki?phrase=';
|
||||||
|
|
||||||
private DataParser $parser;
|
private DataParser $parser;
|
||||||
|
|
||||||
@@ -21,12 +21,12 @@ class BookFinder
|
|||||||
return $this->parser->searchPage(new Document($this->getSearchUrl($phrase), true));
|
return $this->parser->searchPage(new Document($this->getSearchUrl($phrase), true));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function byUrl($url): AbstractBookInfo
|
public function byUrl(string $url): AbstractBookInfo
|
||||||
{
|
{
|
||||||
return $this->parser->singlePage(new Document($url, true));
|
return $this->parser->singlePage(new Document($url, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getSearchUrl(string $phrase): string
|
private function getSearchUrl(string $phrase): string
|
||||||
{
|
{
|
||||||
return self::$searchUrl . $phrase;
|
return self::$searchUrl . $phrase;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,21 +18,23 @@ class DataParser
|
|||||||
{
|
{
|
||||||
$info = new Info();
|
$info = new Info();
|
||||||
$jsonInfo = json_decode($document->first('script[type="application/ld+json"]')->text());
|
$jsonInfo = json_decode($document->first('script[type="application/ld+json"]')->text());
|
||||||
|
$info->url = $document->getDocument()->baseURI;
|
||||||
|
$info->publisher = $document->first('a[href*="wydawnictwo"]')?->text();
|
||||||
$info->author = $document->first('meta[property="books:author"]')->getAttribute('content');
|
$info->author = $document->first('meta[property="books:author"]')->getAttribute('content');
|
||||||
$info->isbn = $document->first('meta[property="books:isbn"]')->getAttribute('content');
|
$info->isbn = $document->first('meta[property="books:isbn"]')->getAttribute('content');
|
||||||
$info->description = $document->first('meta[property="og:description"]')->getAttribute('content');
|
$info->description = trim($document->first('#book-description p')->text());
|
||||||
$info->title = trim($document->first('h1.book__title')->text());
|
$info->title = trim($document->first('h1.book__title')->text());
|
||||||
$info->category = trim($document->first('.book__category')->text());
|
$info->category = trim($document->first('.book__category')->text());
|
||||||
$info->cover_url = $this->generateCoverUrls(
|
$info->cover_url = $this->generateCoverUrls(
|
||||||
$document->first('meta[property="og:image"]')->getAttribute('content')
|
$document->first('meta[property="og:image"]')->getAttribute('content')
|
||||||
);
|
);
|
||||||
$info->pages = (int)$document->first('span.book__pages')?->text();
|
$info->pages = (int)$document->first('span.book__pages')?->text();
|
||||||
if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($document->first('a[href*="/cykl/"]')?->text()), $series)) {
|
if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($document->first('a[href*="/cykl/"]')?->text() ?? ''), $series)) {
|
||||||
$info->cycle = $series[1];
|
$info->cycle = $series[1];
|
||||||
$info->volume = $series[2];
|
$info->volume = $series[2];
|
||||||
}
|
}
|
||||||
$info->language = $jsonInfo->inLanguage ?? null;
|
$info->language = $jsonInfo?->inLanguage ?? trim($document->xpath("//*[contains(text(), 'Język:')]")[0]->nextSibling('dd')->text());
|
||||||
$info->datePublished = $jsonInfo->datePublished ?? null;
|
$info->datePublished = $jsonInfo?->datePublished ?? null;
|
||||||
|
|
||||||
return $info;
|
return $info;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user