Update search to website changes

This commit is contained in:
Krzysztof Płaczek
2025-10-02 12:15:17 +02:00
parent 5f3cbc8b8c
commit 406d6a8f8d
3 changed files with 12 additions and 4 deletions

View File

@@ -13,7 +13,8 @@ abstract class AbstractBookInfo
public array $cover_url; public array $cover_url;
public int $pages; public int $pages;
public string $cycle; public string $cycle;
public int $volume; public string $cycleUrl;
public string $volume;
public ?string $language; public ?string $language;
public string $datePublished; public string $datePublished;
public ?string $publisher; public ?string $publisher;

View File

@@ -28,7 +28,7 @@ class BookFinder
private function getSearchUrl(string $phrase): string private function getSearchUrl(string $phrase): string
{ {
return self::$searchUrl . $phrase; return sprintf('%s%s', self::$searchUrl, $phrase);
} }
} }

View File

@@ -46,8 +46,10 @@ class DataParser
*/ */
public function searchPage(Document $document): array public function searchPage(Document $document): array
{ {
$books = $document->find('#search .authorAllBooks__single'); $books = $document->find('#ksiazki .authorAllBooks__single');
//https://lubimyczytac.pl/searcher/getNextResults/ksiazki
//echo $document->html();
print_r($books);
$booksInfo = []; $booksInfo = [];
if ($document->has('#searchksiazki')) { if ($document->has('#searchksiazki')) {
foreach ($books as $book) { foreach ($books as $book) {
@@ -56,6 +58,11 @@ class DataParser
$bookInfo->author = trim($book->first('.authorAllBooks__singleTextAuthor')->text()); $bookInfo->author = trim($book->first('.authorAllBooks__singleTextAuthor')->text());
$bookInfo->url = $book->first('button[data-book-url]')->getAttribute('data-book-url'); $bookInfo->url = $book->first('button[data-book-url]')->getAttribute('data-book-url');
$bookInfo->cover_url = $this->generateCoverUrls($book->first('.img-fluid')->getAttribute('data-src')); $bookInfo->cover_url = $this->generateCoverUrls($book->first('.img-fluid')->getAttribute('data-src'));
if (preg_match('#(.*) \(tom (.*)\)#ism', trim($book->first('a[href*="/cykl/"]')?->text() ?? ''), $series)) {
$bookInfo->cycle = $series[1];
$bookInfo->volume = $series[2];
$bookInfo->cycleUrl = $book->first('a[href*="/cykl/"]')?->href;
}
$booksInfo[] = $bookInfo; $booksInfo[] = $bookInfo;
} }
} }