Skip to content

Commit

Permalink
Merge pull request #20 from nojimage/develop
Browse files Browse the repository at this point in the history
Fixes #19
  • Loading branch information
nojimage authored Jan 18, 2018
2 parents e19d55e + ccab29e commit a963299
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 3 deletions.
6 changes: 3 additions & 3 deletions lib/Twitter/Text/Extractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ public function extract($tweet)
return array(
'hashtags' => $this->extractHashtags($tweet),
'urls' => $this->extractURLs($tweet),
'mentions' => $this->extractMentionedUsernames($tweet),
'replyto' => $this->extractRepliedUsernames($tweet),
'mentions' => $this->extractMentionedScreennames($tweet),
'replyto' => $this->extractReplyScreenname($tweet),
'hashtags_with_indices' => $this->extractHashtagsWithIndices($tweet),
'urls_with_indices' => $this->extractURLsWithIndices($tweet),
'mentions_with_indices' => $this->extractMentionedUsernamesWithIndices($tweet),
'mentions_with_indices' => $this->extractMentionedScreennamesWithIndices($tweet),
);
}

Expand Down
120 changes: 120 additions & 0 deletions tests/Twitter/Text/ExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ public function testExtractURLsWithIndicesWithoutProtocol()
$this->assertSame(array(), $extracted, 'Unextract url without protocol');
}

/**
* @group Extractor
*/
public function testUrlWithSpecialCCTLDWithoutProtocol()
{
$text = 'MLB.tv vine.co';
Expand All @@ -88,6 +91,9 @@ public function testUrlWithSpecialCCTLDWithoutProtocol()
$this->assertSame(array(), $extracted, 'Unextract url without protocol');
}

/**
* @group Extractor
*/
public function testExtractURLsWithEmoji()
{
$text = "@ummjackson 🤡 https://i.imgur.com/I32CQ81.jpg";
Expand All @@ -99,12 +105,18 @@ public function testExtractURLsWithEmoji()
);
}

/**
* @group Extractor
*/
public function testExtractURLsPrecededByEllipsis()
{
$extracted = $this->extractor->extractURLs('text: ...http://www.example.com');
$this->assertSame(array('http://www.example.com'), $extracted, 'Unextract url preceded by ellipsis');
}

/**
* @group Extractor
*/
public function testExtractURLsWith64CharDomainWithoutProtocol()
{
$text = 'randomurlrandomurlrandomurlrandomurlrandomurlrandomurlrandomurls.com';
Expand All @@ -113,6 +125,9 @@ public function testExtractURLsWith64CharDomainWithoutProtocol()
$this->assertSame(array(), $extracted, 'Handle a 64 character domain without protocol');
}

/**
* @group Extractor
*/
public function testExtractURLsHandleLongUrlWithInvalidDomainLabelsAndShortUrl()
{
// @codingStandardsIgnoreStart
Expand All @@ -127,4 +142,109 @@ public function testExtractURLsHandleLongUrlWithInvalidDomainLabelsAndShortUrl()
),
), $extracted, 'Handle long url with invalid domain labels and short url');
}

/**
* @group Extractor
*/
public function testExtract()
{
// @codingStandardsIgnoreStart
$text = '@someone Hey check out out @otheruser/list_name-01! This is #hashtag1 http://example.com Example cashtags: $TEST $Stock $symbol via @username';
// @codingStandardsIgnoreEnd

$extracted = $this->extractor->extract($text);
$expects = array(
'hashtags' => array(
'hashtag1'
),
'urls' => array(
'http://example.com'
),
'mentions' => array(
'someone',
'otheruser',
'username'
),
'replyto' => 'someone',
'hashtags_with_indices' => array(
array(
'hashtag' => 'hashtag1',
'indices' => array(60, 69)
)
),
'urls_with_indices' => array(
array(
'url' => 'http://example.com',
'indices' => array(70, 88)
)
),
'mentions_with_indices' => array(
array(
'screen_name' => 'someone',
'indices' => array(0, 8)
),
array(
'screen_name' => 'otheruser',
'indices' => array(27, 50)
),
array(
'screen_name' => 'username',
'indices' => array(132, 141)
)
)
);

$this->assertSame($expects, $extracted);
}

/**
* @group Extractor
*/
public function testExtractEntitiesWithIndices()
{
// @codingStandardsIgnoreStart
$text = '@someone Hey check out out @otheruser/list_name-01! This is #hashtag1 http://example.com Example cashtags: $TEST $Stock $symbol via @username';
// @codingStandardsIgnoreEnd

$extracted = $this->extractor->extractEntitiesWithIndices($text);
$expects = array(
array(
'screen_name' => 'someone',
'list_slug' => '',
'indices' => array(0, 8)
),
array(
'screen_name' => 'otheruser',
'list_slug' => '/list_name-01',
'indices' => array(27, 50)
),
array(
'hashtag' => 'hashtag1',
'indices' => array(60, 69)
),
array(
'url' => 'http://example.com',
'indices' => array(70, 88)
),
array(
'cashtag' => 'TEST',
'indices' => array(107, 112)
),
array(
'cashtag' => 'Stock',
'indices' => array(113, 119)
),
array(
'cashtag' => 'symbol',
'indices' => array(120, 127)
),
array(
'screen_name' => 'username',
'list_slug' => '',
'indices' => array(132, 141)
)
);

$this->assertSame($expects, $extracted);
}
}

0 comments on commit a963299

Please sign in to comment.