Started working on parsing symfony cast pages.

This commit is contained in:
Krzysztof Płaczek
2022-08-05 16:44:51 +02:00
parent 3a6a2252a3
commit 119b94470f
6 changed files with 152 additions and 4 deletions

View File

@@ -0,0 +1,48 @@
<?php
namespace App\Http\SymfonyCastDl;
use DiDom\Document;
use GuzzleHttp\Psr7\Response;
class HtmlParser
{
public function getCsrfToken(Response $response): string
{
$document = new Document($response->getBody()->getContents());
return $document->first('input[name="_csrf_token"]')->attr('value');
}
public function getCourses(Response $response): array
{
$courses = [];
$document = new Document($response->getBody()->getContents());
foreach ($document->find('div.js-course-item') as $courseItem) {
$course = [];
$course['name'] = $courseItem->first('h3')->text();
$course['link'] = $courseItem->first('a')->attr('href');
$course['status'] = $courseItem->attr('data-status');
$course['chapter-count'] = $courseItem->attr('data-chapter-count');
$course['times-watched'] = $courseItem->attr('data-times-watched');
$courses[] = $course;
}
return $courses;
}
public function getCourseDetails(Response $response): array
{
$document = new Document($response->getBody()->getContents());
$info = ['chapters' => []];
foreach ($document->find('ul.chapter-list li') as $chapter) {
$info['chapters'][] = [
'link' => $chapter->first('a')->attr('href'),
'title' => preg_replace('/\v(?:[\v\h]+)/', '', $chapter->first('.col')->text()),
'duration' => $chapter->first('.length-styling')->text(),
];
}
return $info;
}
}