diff --git a/tests/config.py b/tests/config.py
index 607bdd8..9e0aa15 100644
--- a/tests/config.py
+++ b/tests/config.py
@@ -34,3 +34,6 @@ test_tweet_symbols_object = {u'text': u'Some symbols: $AAPL and $PEP and $ANOTHE
test_tweet_compat_object = {u'contributors': None, u'truncated': True, u'text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count tow\u2026 https://t.co/SRmsuks2ru", u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'source': u'Twitter Web Client', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'https://t.co/SRmsuks2ru', u'indices': [117, 140], u'expanded_url': u'https://twitter.com/i/web/status/777915304261193728', u'display_url': u'twitter.com/i/web/status/7\u2026'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None}
test_tweet_extended_object = {u'full_text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. https://t.co/I9pUC0NdZC", u'truncated': False, u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'contributors': None, u'source': u'Twitter Web Client', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [], u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'id_str': u'777914712382058496', u'indices': [140, 163], u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'type': u'photo', u'id': 777914712382058496, u'display_url': u'pic.twitter.com/I9pUC0NdZC'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'display_text_range': [0, 139], u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None, u'extended_entities': {u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'display_url': u'pic.twitter.com/I9pUC0NdZC', u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'video_info': {u'aspect_ratio': [1, 1], u'variants': [{u'url': u'https://pbs.twimg.com/tweet_video/Csu1TzEVMAAAEv7.mp4', u'bitrate': 0, u'content_type': u'video/mp4'}]}, u'id_str': u'777914712382058496', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'indices': [140, 163], u'type': u'animated_gif', u'id': 777914712382058496, u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg'}]}}
test_tweet_extended_html = 'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters.'
+
+test_tweet_identical_urls = {u'entities': {u'hashtags': [], u'user_mentions': [], u'symbols': [], u'urls': [{u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [42, 65], u'url': u'https://t.co/W0uArTMk9N'}, {u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [101, 124], u'url': u'https://t.co/W0uArTMk9N'}]}, u'full_text': u'Use Cases, Trials and Making 5G a Reality https://t.co/W0uArTMk9N #5G #innovation via @5GWorldSeries https://t.co/W0uArTMk9N'}
+
diff --git a/tests/test_core.py b/tests/test_core.py
index c7cf2a2..45d856e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -4,6 +4,7 @@ from twython import Twython, TwythonError, TwythonAuthError, TwythonRateLimitErr
from .config import (
test_tweet_object, test_tweet_html, test_tweet_symbols_object,
test_tweet_compat_object, test_tweet_extended_object, test_tweet_extended_html,
+ test_tweet_identical_urls,
unittest
)
@@ -321,6 +322,12 @@ class TwythonAPITestCase(unittest.TestCase):
self.assertTrue('http://google.com' not in tweet_text)
self.assertTrue('google.com' not in tweet_text)
+ def test_html_for_tweet_identical_urls(self):
+ """If the 'url's for different url entities are identical, they should link correctly."""
+ tweet_text = self.api.html_for_tweet(test_tweet_identical_urls)
+ self.assertEqual(tweet_text,
+ u'Use Cases, Trials and Making 5G a Reality buff.ly/2sEhrgO #5G #innovation via @5GWorldSeries buff.ly/2sEhrgO')
+
def test_html_for_tweet_symbols(self):
tweet_text = self.api.html_for_tweet(test_tweet_symbols_object)
# Should only link symbols listed in entities:
diff --git a/twython/api.py b/twython/api.py
index f806cea..954033f 100644
--- a/twython/api.py
+++ b/twython/api.py
@@ -556,62 +556,78 @@ class Twython(EndpointsMixin, object):
suffix_text = orig_tweet_text[display_text_end:len(orig_tweet_text)]
if 'entities' in tweet:
- entities = tweet['entities']
+ # We'll put all the bits of replacement HTML and their starts/ends
+ # in this list:
+ entities = []
# Mentions
- for entity in sorted(entities['user_mentions'],
- key=lambda mention: len(mention['screen_name']), reverse=True):
- start, end = entity['indices'][0], entity['indices'][1]
+ if 'user_mentions' in tweet['entities']:
+ for entity in tweet['entities']['user_mentions']:
+ temp = {}
+ temp['start'] = entity['indices'][0]
+ temp['end'] = entity['indices'][1]
- mention_html = '@%(screen_name)s' % {'screen_name': entity['screen_name']}
- sub_expr = r'(?)' + orig_tweet_text[start:end] + '(?!)'
- if display_text_start <= start <= display_text_end:
- display_text = re.sub(sub_expr, mention_html, display_text)
- else:
- prefix_text = re.sub(sub_expr, mention_html, prefix_text)
+ mention_html = '@%(screen_name)s' % {'screen_name': entity['screen_name']}
+
+ if display_text_start <= temp['start'] <= display_text_end:
+ temp['replacement'] = mention_html
+ entities.append(temp)
+ else:
+ prefix_text = re.sub(sub_expr, mention_html, prefix_text)
# Hashtags
- for entity in sorted(entities['hashtags'],
- key=lambda hashtag: len(hashtag['text']), reverse=True):
- start, end = entity['indices'][0], entity['indices'][1]
+ if 'hashtags' in tweet['entities']:
+ for entity in tweet['entities']['hashtags']:
+ temp = {}
+ temp['start'] = entity['indices'][0]
+ temp['end'] = entity['indices'][1]
- hashtag_html = '#%(hashtag)s'
- display_text = re.sub(r'(?)' + orig_tweet_text[start:end] + '(?!)',
- hashtag_html % {'hashtag': entity['text']}, display_text)
+ url_html = '#%(hashtag)s' % {'hashtag': entity['text']}
+
+ temp['replacement'] = url_html
+ entities.append(temp)
# Symbols
- for entity in sorted(entities['symbols'],
- key=lambda symbol: len(symbol['text']), reverse=True):
- start, end = entity['indices'][0], entity['indices'][1]
+ if 'symbols' in tweet['entities']:
+ for entity in tweet['entities']['symbols']:
+ temp = {}
+ temp['start'] = entity['indices'][0]
+ temp['end'] = entity['indices'][1]
- symbol_html = '$%(symbol)s'
- display_text = re.sub(r'(?)' + re.escape(orig_tweet_text[start:end]) + r'\b(?!)',
- symbol_html % {'symbol': entity['text']}, display_text)
+ url_html = '$%(symbol)s' % {'symbol': entity['text']}
- # Urls
- for entity in entities['urls']:
- start, end = entity['indices'][0], entity['indices'][1]
- if use_display_url and entity.get('display_url') \
- and not use_expanded_url:
- shown_url = entity['display_url']
- elif use_expanded_url and entity.get('expanded_url'):
- shown_url = entity['expanded_url']
- else:
- shown_url = entity['url']
+ temp['replacement'] = url_html
+ entities.append(temp)
- url_html = '%s' % (entity['url'], shown_url)
- if display_text_start <= start <= display_text_end:
- display_text = display_text.replace(orig_tweet_text[start:end], url_html)
- else:
- suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html)
+ # URLs
+ if 'urls' in tweet['entities']:
+ for entity in tweet['entities']['urls']:
+ temp = {}
+ temp['start'] = entity['indices'][0]
+ temp['end'] = entity['indices'][1]
- # Media
- if 'media' in entities:
- for entity in entities['media']:
- start, end = entity['indices'][0], entity['indices'][1]
- if use_display_url and entity.get('display_url') \
- and not use_expanded_url:
+ if use_display_url and entity.get('display_url') and not use_expanded_url:
+ shown_url = entity['display_url']
+ elif use_expanded_url and entity.get('expanded_url'):
+ shown_url = entity['expanded_url']
+ else:
+ shown_url = entity['url']
+
+ url_html = '%s' % (entity['url'], shown_url)
+
+ if display_text_start <= temp['start'] <= display_text_end:
+ temp['replacement'] = url_html
+ entities.append(temp)
+ else:
+ suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html)
+
+ if 'media' in tweet['entities']:
+ for entity in tweet['entities']['media']:
+ temp = {}
+ temp['start'] = entity['indices'][0]
+ temp['end'] = entity['indices'][1]
+
+ if use_display_url and entity.get('display_url') and not use_expanded_url:
shown_url = entity['display_url']
elif use_expanded_url and entity.get('expanded_url'):
shown_url = entity['expanded_url']
@@ -619,11 +635,17 @@ class Twython(EndpointsMixin, object):
shown_url = entity['url']
url_html = '%s' % (entity['url'], shown_url)
- if display_text_start <= start <= display_text_end:
- # for compatibility with pre-extended tweets
- display_text = display_text.replace(orig_tweet_text[start:end], url_html)
+
+ if display_text_start <= temp['start'] <= display_text_end:
+ temp['replacement'] = url_html
+ entities.append(temp)
else:
- suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html)
+ suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html)
+
+ # Now do all the replacements, starting from the end, so that the
+ # start/end indices still work:
+ for entity in sorted(entities, key=lambda e: e['start'], reverse=True):
+ display_text = display_text[0:entity['start']] + entity['replacement'] + display_text[entity['end']:]
quote_text = ''
if expand_quoted_status and tweet.get('is_quote_status') and tweet.get('quoted_status'):