From 119b94470f8ea0154ebc4bb67f1d101d1d8de906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20P=C5=82aczek?= Date: Fri, 5 Aug 2022 16:44:51 +0200 Subject: [PATCH] Started working on parsing symfony cast pages. --- app/Http/Controllers/Controller.php | 7 +++ app/Http/SymfonyCastDl/HtmlParser.php | 48 +++++++++++++++++ .../SymfonyCastDl/SymfonyCastDlService.php | 40 ++++++++++++++ composer.json | 1 + composer.lock | 54 ++++++++++++++++++- routes/web.php | 6 +-- 6 files changed, 152 insertions(+), 4 deletions(-) create mode 100644 app/Http/SymfonyCastDl/HtmlParser.php create mode 100644 app/Http/SymfonyCastDl/SymfonyCastDlService.php diff --git a/app/Http/Controllers/Controller.php b/app/Http/Controllers/Controller.php index a0a2a8a..c1d762c 100644 --- a/app/Http/Controllers/Controller.php +++ b/app/Http/Controllers/Controller.php @@ -2,6 +2,8 @@ namespace App\Http\Controllers; +use App\Http\SymfonyCastDl\HtmlParser; +use App\Http\SymfonyCastDl\SymfonyCastDlService; use Illuminate\Foundation\Auth\Access\AuthorizesRequests; use Illuminate\Foundation\Bus\DispatchesJobs; use Illuminate\Foundation\Validation\ValidatesRequests; @@ -10,4 +12,9 @@ use Illuminate\Routing\Controller as BaseController; class Controller extends BaseController { use AuthorizesRequests, DispatchesJobs, ValidatesRequests; + + public function index(HtmlParser $htmlParser) + { + $service = new SymfonyCastDlService($htmlParser); + } } diff --git a/app/Http/SymfonyCastDl/HtmlParser.php b/app/Http/SymfonyCastDl/HtmlParser.php new file mode 100644 index 0000000..fe48d9d --- /dev/null +++ b/app/Http/SymfonyCastDl/HtmlParser.php @@ -0,0 +1,48 @@ +getBody()->getContents()); + return $document->first('input[name="_csrf_token"]')->attr('value'); + } + + public function getCourses(Response $response): array + { + $courses = []; + $document = new Document($response->getBody()->getContents()); + foreach ($document->find('div.js-course-item') as $courseItem) { + $course = []; + $course['name'] = $courseItem->first('h3')->text(); + $course['link'] = $courseItem->first('a')->attr('href'); + $course['status'] = $courseItem->attr('data-status'); + $course['chapter-count'] = $courseItem->attr('data-chapter-count'); + $course['times-watched'] = $courseItem->attr('data-times-watched'); + + $courses[] = $course; + } + return $courses; + } + + public function getCourseDetails(Response $response): array + { + $document = new Document($response->getBody()->getContents()); + $info = ['chapters' => []]; + foreach ($document->find('ul.chapter-list li') as $chapter) { + $info['chapters'][] = [ + 'link' => $chapter->first('a')->attr('href'), + 'title' => preg_replace('/\v(?:[\v\h]+)/', '', $chapter->first('.col')->text()), + 'duration' => $chapter->first('.length-styling')->text(), + ]; + } + + return $info; + } + +} diff --git a/app/Http/SymfonyCastDl/SymfonyCastDlService.php b/app/Http/SymfonyCastDl/SymfonyCastDlService.php new file mode 100644 index 0000000..ad21569 --- /dev/null +++ b/app/Http/SymfonyCastDl/SymfonyCastDlService.php @@ -0,0 +1,40 @@ + "https://symfonycasts.com", + 'cookies' => true + ]); + + $response = $client->get('login'); + + + $token = $htmlParser->getCsrfToken($response); + + $response = $client->post('login', [ + 'form_params' => [ +// 'email' => 'krzysiej@gmail.com', +// 'password' => '', + '_csrf_token' => $token + ], + 'on_stats' => function (TransferStats $stats) use (&$currentUrl) { + $currentUrl = $stats->getEffectiveUri(); + } + ]); + + $coursePage = $client->get('courses/filtering'); +// dump($htmlParser->getCourses($coursePage)); + + $singleCoursePage = $client->get('screencast/api-platform'); + dd($htmlParser->getCourseDetails($singleCoursePage)); + } +} diff --git a/composer.json b/composer.json index 299b7e8..61a932b 100644 --- a/composer.json +++ b/composer.json @@ -7,6 +7,7 @@ "require": { "php": "^8.0.2", "guzzlehttp/guzzle": "^7.2", + "imangazaliev/didom": "^2.0", "laravel/framework": "^9.19", "laravel/sanctum": "^3.0", "laravel/tinker": "^2.7" diff --git a/composer.lock b/composer.lock index 5cb6a44..abd3a20 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "ccbd816a07b206f971042295b899d1ba", + "content-hash": "6969489719f565e06eb44575a4396934", "packages": [ { "name": "brick/math", @@ -893,6 +893,58 @@ ], "time": "2022-06-20T21:43:11+00:00" }, + { + "name": "imangazaliev/didom", + "version": "2.0", + "source": { + "type": "git", + "url": "https://github.com/Imangazaliev/DiDOM.git", + "reference": "87f7089d95aef7fd09dc68826cfa245b90f3040b" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Imangazaliev/DiDOM/zipball/87f7089d95aef7fd09dc68826cfa245b90f3040b", + "reference": "87f7089d95aef7fd09dc68826cfa245b90f3040b", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-iconv": "*", + "php": ">=7.2" + }, + "require-dev": { + "phpunit/phpunit": "^8.5" + }, + "type": "library", + "autoload": { + "psr-4": { + "DiDom\\": "src/DiDom/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Imangazaliev Muhammad", + "email": "imangazalievm@gmail.com" + } + ], + "description": "Simple and fast HTML parser", + "homepage": "https://github.com/Imangazaliev/DiDOM", + "keywords": [ + "didom", + "html", + "parser", + "xml" + ], + "support": { + "issues": "https://github.com/Imangazaliev/DiDOM/issues", + "source": "https://github.com/Imangazaliev/DiDOM/tree/2.0" + }, + "time": "2022-05-08T01:48:13+00:00" + }, { "name": "laravel/framework", "version": "v9.23.0", diff --git a/routes/web.php b/routes/web.php index b130397..d0c41d6 100644 --- a/routes/web.php +++ b/routes/web.php @@ -13,6 +13,6 @@ use Illuminate\Support\Facades\Route; | */ -Route::get('/', function () { - return view('welcome'); -}); +Route::get('/', [\App\Http\Controllers\Controller::class, 'index']); + +