Excluded data parsing to a data parser class that handles document and returns info class or array of those.
This commit is contained in:
57
src/DataParser.php
Normal file
57
src/DataParser.php
Normal file
@@ -0,0 +1,57 @@
|
||||
<?php
|
||||
|
||||
namespace Techtube\Bookinfo;
|
||||
|
||||
use DiDom\Document;
|
||||
use DiDom\Exceptions\InvalidSelectorException;
|
||||
use Techtube\Bookinfo\Api\AbstractBookInfo;
|
||||
|
||||
class DataParser
|
||||
{
|
||||
/**
|
||||
* @param Document $document
|
||||
* @return AbstractBookInfo
|
||||
* @throws InvalidSelectorException
|
||||
*/
|
||||
public function singlePage(Document $document): AbstractBookInfo
|
||||
{
|
||||
$info = new Info();
|
||||
$jsonInfo = json_decode($document->first('script[type="application/ld+json"]')->text());
|
||||
$info->author = $document->first('meta[property="books:author"]')->getAttribute('content');
|
||||
$info->isbn = $document->first('meta[property="books:isbn"]')->getAttribute('content');
|
||||
$info->description = $document->first('meta[property="og:description"]')->getAttribute('content');
|
||||
$info->title = trim($document->first('h1.book__title')->text());
|
||||
$info->category = trim($document->first('.book__category')->text());
|
||||
$info->cover_url = $document->first('meta[property="og:image"]')->getAttribute('content');
|
||||
$info->pages = (int)$document->first('span.book__pages')?->text();
|
||||
if (preg_match('#(.*) \(tom (\d*)\)#ism', trim($document->first('a[href*="/cykl/"]')?->text()), $series)) {
|
||||
$info->cycle = $series[1];
|
||||
$info->volume = $series[2];
|
||||
}
|
||||
$info->language = $jsonInfo->inLanguage ?? null;
|
||||
$info->datePublished = $jsonInfo->datePublished ?? null;
|
||||
|
||||
return $info;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Document $document
|
||||
* @return AbstractBookInfo[]
|
||||
* @throws InvalidSelectorException
|
||||
*/
|
||||
public function searchPage(Document $document): array
|
||||
{
|
||||
$books = $document->find('#search .authorAllBooks__single');
|
||||
|
||||
$booksInfo = [];
|
||||
foreach ($books as $book) {
|
||||
$bookInfo = new Info();
|
||||
$bookInfo->title = trim($book->first('.authorAllBooks__singleTextTitle')->text());
|
||||
$bookInfo->author = trim($book->first('.authorAllBooks__singleTextAuthor')->text());
|
||||
$bookInfo->url = $book->first('button[data-book-url]')->getAttribute('data-book-url');
|
||||
$bookInfo->cover_url = $book->first('.img-fluid')->getAttribute('src');
|
||||
$booksInfo[] = $bookInfo;
|
||||
}
|
||||
return $booksInfo;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user