Merge branch 'issue-30' of krzysiej/paper-pi into master
This commit was merged in pull request #33.
This commit is contained in:
@@ -12,16 +12,19 @@ class HtmlToPos
|
||||
return str_replace(['<br>', '<br/>', '<br />'], "\n", $text);
|
||||
}
|
||||
|
||||
public function convert($html = '')
|
||||
{
|
||||
$posText = $this->handleNewLine($html);
|
||||
|
||||
/**
|
||||
* usuwa linijki które na początku mają ! albo #
|
||||
* @param $posText
|
||||
* @return string
|
||||
*/
|
||||
$posText = preg_replace('/^\s*[!#].*?$[\r\n]?/m', '', $posText);
|
||||
private function hideLines($posText): string
|
||||
{
|
||||
return preg_replace('/^\s*[!#].*?$[\r\n]?/m', '', $posText);
|
||||
}
|
||||
|
||||
|
||||
private function pickRandomElement($posText)
|
||||
{
|
||||
/**
|
||||
* poniższy kod wybieraz tekstu fragmenty w podwójnych nawiasach kwadratowych, rozdziela je po przecinkach
|
||||
* następnie losuje element i podmienia go w miejsce oryginalnego fragmentu
|
||||
@@ -37,6 +40,11 @@ class HtmlToPos
|
||||
$pos = strpos($posText, $elements[0]);
|
||||
$posText = substr_replace($posText, $data[mt_rand(0, count($data) - 1)], $pos, strlen($elements[0]));
|
||||
}
|
||||
return $posText;
|
||||
}
|
||||
|
||||
private function randomNumber($posText)
|
||||
{
|
||||
|
||||
/**
|
||||
* przeszukiwanie tekstu pod katem zawartości [ liczba, liczba2 ] i zamienianiu tego na randomową liczbę z przedziału
|
||||
@@ -44,7 +52,7 @@ class HtmlToPos
|
||||
* obojętnie czy ujemną czy nie
|
||||
*/
|
||||
$re = '/\[\s*(-?\d+)\s*\,\s*(-?\d+)\s*\]/';
|
||||
preg_match_all($re, $html, $matches, PREG_SET_ORDER, 0);
|
||||
preg_match_all($re, $posText, $matches, PREG_SET_ORDER, 0);
|
||||
foreach ($matches as $match) {
|
||||
$pos = strpos($posText, $match[0]);
|
||||
$posText = substr_replace($posText, mt_rand($match[1], $match[2]), $pos, strlen($match[0]));
|
||||
@@ -52,4 +60,27 @@ class HtmlToPos
|
||||
return $posText;
|
||||
}
|
||||
|
||||
public function vocabularyWord($posText)
|
||||
{
|
||||
$voc = new Vocabulary();
|
||||
$randomWord = $voc->getRandomWord();
|
||||
$word = $voc->getWord($randomWord);
|
||||
|
||||
$posText = str_replace('[vocabulary_word]', $word->word, $posText);
|
||||
$posText = str_replace('[vocabulary_short]', $word->short, $posText);
|
||||
$posText = str_replace('[vocabulary_long]', $word->long, $posText);
|
||||
|
||||
return $posText;
|
||||
}
|
||||
|
||||
public function convert($html = '')
|
||||
{
|
||||
$posText = $this->handleNewLine($html);
|
||||
$posText = $this->hideLines($posText);
|
||||
$posText = $this->pickRandomElement($posText);
|
||||
$posText = $this->randomNumber($posText);
|
||||
$posText = $this->vocabularyWord($posText);
|
||||
return $posText;
|
||||
}
|
||||
|
||||
}
|
||||
88
app/Paper/Vocabulary.php
Normal file
88
app/Paper/Vocabulary.php
Normal file
@@ -0,0 +1,88 @@
|
||||
<?php
|
||||
|
||||
namespace App\Paper;
|
||||
|
||||
use DiDom\Document;
|
||||
use DiDom\Query;
|
||||
|
||||
|
||||
class Vocabulary
|
||||
{
|
||||
|
||||
const URL = 'https://www.vocabulary.com/';
|
||||
const EXAMPLES_URL = 'https://corpus.vocabulary.com/api/1.0/';
|
||||
|
||||
|
||||
public function getWordData($word)
|
||||
{
|
||||
return new Document($this->getWordUrl($word), true);
|
||||
}
|
||||
|
||||
private function getWordUrl($word)
|
||||
{
|
||||
$word = trim($word);
|
||||
return self::URL . 'dictionary/definition.ajax?search=' . $word . '&lang=en';
|
||||
}
|
||||
|
||||
private function getWordExamplesUrl($word, $maxResult = 10, $offset = 0)
|
||||
{
|
||||
$word = trim($word);
|
||||
return sprintf('%sexamples.json?query=%s&maxResults=%d&startOffset=%d', self::EXAMPLES_URL, $word, $maxResult, $offset);
|
||||
}
|
||||
|
||||
public function getWord($word)
|
||||
{
|
||||
$document = $this->getWordData($word);
|
||||
|
||||
|
||||
$wordData = new \stdClass();
|
||||
$wordPageSelector = $document->first('.wordPage');
|
||||
$wordData->lang = $wordPageSelector->attr('data-lang');
|
||||
$wordData->word = $wordPageSelector->attr('data-word');
|
||||
$wordData->next = $wordPageSelector->attr('data-next');
|
||||
$wordData->prev = $wordPageSelector->attr('data-prev');
|
||||
$wordData->short = trim($document->first('p.short')->text());
|
||||
$wordData->long = trim($document->first('p.long')->text());
|
||||
|
||||
$wordData->instances = [];
|
||||
foreach ($document->find('.ordinal ') as $ordinal) {
|
||||
$definition = [];
|
||||
$definition['type'] = $ordinal->first('h3')->first('a')->attr('title');
|
||||
$definition['definition'] = trim($ordinal->first('//h3[@class="definition"][1]/text()[2]', Query::TYPE_XPATH));
|
||||
$definition['example'] = ($example = $ordinal->first('//div[@class="example"]', Query::TYPE_XPATH)) ? $example->text() : '';
|
||||
|
||||
foreach ($ordinal->find('.instances') as $instance) {
|
||||
$in = [];
|
||||
$in['type'] = str_replace(' ', '_', trim(strtolower($instance->first('dt::text()')), ': '));
|
||||
$in['words'] = $instance->find('dd a::text()');
|
||||
$in['definition'] = $instance->first('div.definition::text()');
|
||||
$definition[$in['type']] = $in;
|
||||
}
|
||||
|
||||
$wordData->instances[] = $definition;
|
||||
}
|
||||
$wordData->examples = $this->getExamples($word, 3);
|
||||
|
||||
|
||||
return $wordData;
|
||||
}
|
||||
|
||||
private function getRandomWordUrl()
|
||||
{
|
||||
return self::URL . 'randomword.json';
|
||||
}
|
||||
|
||||
public function getRandomWord()
|
||||
{
|
||||
$data = json_decode(file_get_contents($this->getRandomWordUrl()));
|
||||
if ($data->status != 0) {
|
||||
throw new Exception('error');
|
||||
}
|
||||
return $data->result->word;
|
||||
}
|
||||
|
||||
public function getExamples($word, $maxResult = 10, $offset = 0)
|
||||
{
|
||||
return json_decode(file_get_contents($this->getWordExamplesUrl($word, $maxResult, $offset)), 1);
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
"license": "MIT",
|
||||
"type": "project",
|
||||
"require": {
|
||||
"php": ">=5.6.4",
|
||||
"php": ">=7.0.0",
|
||||
"imangazaliev/didom": "^1.9",
|
||||
"intervention/image": "^2.3",
|
||||
"laravel/framework": "5.4.*",
|
||||
|
||||
71
tests/Unit/VocabularyTest.php
Normal file
71
tests/Unit/VocabularyTest.php
Normal file
@@ -0,0 +1,71 @@
|
||||
<?php
|
||||
|
||||
namespace Tests\Unit;
|
||||
|
||||
use App\Paper\Vocabulary;
|
||||
use Tests\TestCase;
|
||||
|
||||
class VocabularyTest extends TestCase
|
||||
{
|
||||
|
||||
public function testGetExistingWordData()
|
||||
{
|
||||
$voc = new Vocabulary();
|
||||
$data = $voc->getWordData('random');
|
||||
$this->assertNotEmpty($data);
|
||||
$element = $data->find('.wordPage[data-word="random"]');
|
||||
$this->assertCount(1, $element);
|
||||
}
|
||||
|
||||
public function testGetNonExistingWordData()
|
||||
{
|
||||
$nonExistingWord = time();
|
||||
$voc = new Vocabulary();
|
||||
$data = $voc->getWordData($nonExistingWord);
|
||||
$this->assertNotEmpty($data);
|
||||
$element = $data->find('.wordPage[data-word="random"]');
|
||||
$this->assertCount(0, $element);
|
||||
|
||||
$elementNotFound = $data->find('.noresults[data-search="' . $nonExistingWord . '"]');
|
||||
$this->assertCount(1, $elementNotFound);
|
||||
}
|
||||
|
||||
public function testGetExistingWord()
|
||||
{
|
||||
$dummyWord = 'dummy';
|
||||
$vot = new Vocabulary();
|
||||
$word = $vot->getWord($dummyWord);
|
||||
|
||||
$this->assertEquals('en', $word->lang);
|
||||
$this->assertEquals('dummy', $word->word);
|
||||
$this->assertEquals('dump', $word->next);
|
||||
$this->assertEquals('dumbstruck', $word->prev);
|
||||
$this->assertEquals('Have you ever seen an entertainer make a doll look like it’s talking? The entertainer is using a dummy — a doll made to look like a person.', $word->short);
|
||||
$this->assertEquals("A dummy is a type of doll that looks like a person. Entertainers called ventriloquists can make dummies appear to talk. The automobile industry uses dummies in cars to study how safe cars are during a crash. A dummy can also be anything that looks real but doesn’t work: a fake. Actors in a play might use certain props that are dummies, such as a dummy laptop. Dummy is also an insult used to mean “an ignorant person.”", $word->long);
|
||||
|
||||
}
|
||||
|
||||
public function testGetRandomWord()
|
||||
{
|
||||
$vot = new Vocabulary();
|
||||
$word = $vot->getRandomWord();
|
||||
$this->assertNotNull($word);
|
||||
}
|
||||
|
||||
public function testGetRandomWordData()
|
||||
{
|
||||
$vot = new Vocabulary();
|
||||
$word = $vot->getRandomWord();
|
||||
$this->assertNotNull($word);
|
||||
|
||||
$wordData = $vot->getWord($word);
|
||||
|
||||
$this->assertEquals('en', $wordData->lang);
|
||||
$this->assertNotEmpty($wordData->word);
|
||||
$this->assertNotEmpty($wordData->next);
|
||||
$this->assertNotEmpty($wordData->prev);
|
||||
$this->assertNotEmpty($wordData->short);
|
||||
$this->assertNotEmpty($wordData->long);
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user