85 lines
3.6 KiB
PHP
85 lines
3.6 KiB
PHP
<?php
|
|
|
|
namespace Techtube\Bookinfo;
|
|
|
|
use DiDom\Document;
|
|
use DiDom\Exceptions\InvalidSelectorException;
|
|
use DiDom\Query;
|
|
use JetBrains\PhpStorm\ArrayShape;
|
|
use Techtube\Bookinfo\Api\AbstractBookInfo;
|
|
|
|
class DataParser
|
|
{
|
|
/**
|
|
* @param Document $document
|
|
* @return AbstractBookInfo
|
|
* @throws InvalidSelectorException
|
|
*/
|
|
public function singlePage(Document $document): AbstractBookInfo
|
|
{
|
|
$info = new Info();
|
|
$jsonInfo = json_decode($document->first('script[type="application/ld+json"]')->text());
|
|
$info->url = $document->getDocument()->baseURI;
|
|
$info->publisher = $document->first('a[href*="wydawnictwo"]')?->text();
|
|
$info->author = $document->first('meta[property="books:author"]')->getAttribute('content');
|
|
$info->isbn = $document->first('meta[property="books:isbn"]')->getAttribute('content');
|
|
$info->description = trim($document->first('#book-description p')->text());
|
|
$info->title = trim($document->first('h1.book__title')->text());
|
|
$info->category = trim($document->first('.book__category')->text());
|
|
$info->cover_url = $this->generateCoverUrls(
|
|
$document->first('meta[property="og:image"]')->getAttribute('content')
|
|
);
|
|
$info->pages = (int)$document->first('span.book__pages')?->text();
|
|
if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($document->first('a[href*="/cykl/"]')?->text() ?? ''), $series)) {
|
|
$info->cycle = $series[1];
|
|
$info->volume = $series[2];
|
|
}
|
|
$info->language = $jsonInfo?->inLanguage ?? trim($document->xpath("//*[contains(text(), 'Język:')]")[0]->nextSibling('dd')->text());
|
|
$info->datePublished = $jsonInfo?->datePublished ?? null;
|
|
$info->translator = trim($document->first("//*[contains(text(), 'Tłumacz:')]", Query::TYPE_XPATH)?->nextSibling('dd')?->text() ?? '');
|
|
|
|
return $info;
|
|
}
|
|
|
|
/**
|
|
* @param Document $document
|
|
* @return AbstractBookInfo[]
|
|
* @throws InvalidSelectorException
|
|
*/
|
|
public function searchPage(Document $document): array
|
|
{
|
|
$books = $document->find('#ksiazki .authorAllBooks__single');
|
|
$booksInfo = [];
|
|
if ($document->has('#searchksiazki')) {
|
|
foreach ($books as $book) {
|
|
$bookInfo = new Info();
|
|
$bookInfo->title = trim($book->first('.authorAllBooks__singleTextTitle')->text());
|
|
$bookInfo->author = trim($book->first('.authorAllBooks__singleTextAuthor')->text());
|
|
$bookInfo->url = $book->first('button[data-book-url]')->getAttribute('data-book-url');
|
|
$bookInfo->cover_url = $this->generateCoverUrls($book->first('.img-fluid')->getAttribute('data-src'));
|
|
if (preg_match('#(.*) \(tom (.*)\)#ism', trim($book->first('a[href*="/cykl/"]')?->text() ?? ''), $series)) {
|
|
$bookInfo->cycle = $series[1];
|
|
$bookInfo->volume = $series[2];
|
|
$bookInfo->cycleUrl = $book->first('a[href*="/cykl/"]')?->href;
|
|
}
|
|
$booksInfo[] = $bookInfo;
|
|
}
|
|
}
|
|
return $booksInfo;
|
|
}
|
|
|
|
/**
|
|
* @param string $coverUrl
|
|
* @return array
|
|
*/
|
|
#[ArrayShape(['small' => "string", 'medium' => "string", 'large' => "string"])]
|
|
private function generateCoverUrls(string $coverUrl): array
|
|
{
|
|
$coverUrlBase = preg_replace('(\d*?x\d*?\.jpg)', '', $coverUrl);
|
|
return [
|
|
'small' => $coverUrlBase . '70x100.jpg',
|
|
'medium' => $coverUrlBase . '170x243.jpg',
|
|
'large' => $coverUrlBase . '352x500.jpg'
|
|
];
|
|
}
|
|
} |