diff --git a/tests/config.py b/tests/config.py index 607bdd8..9e0aa15 100644 --- a/tests/config.py +++ b/tests/config.py @@ -34,3 +34,6 @@ test_tweet_symbols_object = {u'text': u'Some symbols: $AAPL and $PEP and $ANOTHE test_tweet_compat_object = {u'contributors': None, u'truncated': True, u'text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count tow\u2026 https://t.co/SRmsuks2ru", u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'source': u'Twitter Web Client', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'https://t.co/SRmsuks2ru', u'indices': [117, 140], u'expanded_url': u'https://twitter.com/i/web/status/777915304261193728', u'display_url': u'twitter.com/i/web/status/7\u2026'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None} test_tweet_extended_object = {u'full_text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. https://t.co/I9pUC0NdZC", u'truncated': False, u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'contributors': None, u'source': u'Twitter Web Client', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [], u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'id_str': u'777914712382058496', u'indices': [140, 163], u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'type': u'photo', u'id': 777914712382058496, u'display_url': u'pic.twitter.com/I9pUC0NdZC'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'display_text_range': [0, 139], u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None, u'extended_entities': {u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'display_url': u'pic.twitter.com/I9pUC0NdZC', u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'video_info': {u'aspect_ratio': [1, 1], u'variants': [{u'url': u'https://pbs.twimg.com/tweet_video/Csu1TzEVMAAAEv7.mp4', u'bitrate': 0, u'content_type': u'video/mp4'}]}, u'id_str': u'777914712382058496', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'indices': [140, 163], u'type': u'animated_gif', u'id': 777914712382058496, u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg'}]}} test_tweet_extended_html = 'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. pic.twitter.com/I9pUC0NdZC' + +test_tweet_identical_urls = {u'entities': {u'hashtags': [], u'user_mentions': [], u'symbols': [], u'urls': [{u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [42, 65], u'url': u'https://t.co/W0uArTMk9N'}, {u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [101, 124], u'url': u'https://t.co/W0uArTMk9N'}]}, u'full_text': u'Use Cases, Trials and Making 5G a Reality https://t.co/W0uArTMk9N #5G #innovation via @5GWorldSeries https://t.co/W0uArTMk9N'} + diff --git a/tests/test_core.py b/tests/test_core.py index c7cf2a2..45d856e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4,6 +4,7 @@ from twython import Twython, TwythonError, TwythonAuthError, TwythonRateLimitErr from .config import ( test_tweet_object, test_tweet_html, test_tweet_symbols_object, test_tweet_compat_object, test_tweet_extended_object, test_tweet_extended_html, + test_tweet_identical_urls, unittest ) @@ -321,6 +322,12 @@ class TwythonAPITestCase(unittest.TestCase): self.assertTrue('http://google.com' not in tweet_text) self.assertTrue('google.com' not in tweet_text) + def test_html_for_tweet_identical_urls(self): + """If the 'url's for different url entities are identical, they should link correctly.""" + tweet_text = self.api.html_for_tweet(test_tweet_identical_urls) + self.assertEqual(tweet_text, + u'Use Cases, Trials and Making 5G a Reality buff.ly/2sEhrgO #5G #innovation via @5GWorldSeries buff.ly/2sEhrgO') + def test_html_for_tweet_symbols(self): tweet_text = self.api.html_for_tweet(test_tweet_symbols_object) # Should only link symbols listed in entities: diff --git a/twython/api.py b/twython/api.py index f806cea..954033f 100644 --- a/twython/api.py +++ b/twython/api.py @@ -556,62 +556,78 @@ class Twython(EndpointsMixin, object): suffix_text = orig_tweet_text[display_text_end:len(orig_tweet_text)] if 'entities' in tweet: - entities = tweet['entities'] + # We'll put all the bits of replacement HTML and their starts/ends + # in this list: + entities = [] # Mentions - for entity in sorted(entities['user_mentions'], - key=lambda mention: len(mention['screen_name']), reverse=True): - start, end = entity['indices'][0], entity['indices'][1] + if 'user_mentions' in tweet['entities']: + for entity in tweet['entities']['user_mentions']: + temp = {} + temp['start'] = entity['indices'][0] + temp['end'] = entity['indices'][1] - mention_html = '@%(screen_name)s' % {'screen_name': entity['screen_name']} - sub_expr = r'(?)' + orig_tweet_text[start:end] + '(?!)' - if display_text_start <= start <= display_text_end: - display_text = re.sub(sub_expr, mention_html, display_text) - else: - prefix_text = re.sub(sub_expr, mention_html, prefix_text) + mention_html = '@%(screen_name)s' % {'screen_name': entity['screen_name']} + + if display_text_start <= temp['start'] <= display_text_end: + temp['replacement'] = mention_html + entities.append(temp) + else: + prefix_text = re.sub(sub_expr, mention_html, prefix_text) # Hashtags - for entity in sorted(entities['hashtags'], - key=lambda hashtag: len(hashtag['text']), reverse=True): - start, end = entity['indices'][0], entity['indices'][1] + if 'hashtags' in tweet['entities']: + for entity in tweet['entities']['hashtags']: + temp = {} + temp['start'] = entity['indices'][0] + temp['end'] = entity['indices'][1] - hashtag_html = '#%(hashtag)s' - display_text = re.sub(r'(?)' + orig_tweet_text[start:end] + '(?!)', - hashtag_html % {'hashtag': entity['text']}, display_text) + url_html = '#%(hashtag)s' % {'hashtag': entity['text']} + + temp['replacement'] = url_html + entities.append(temp) # Symbols - for entity in sorted(entities['symbols'], - key=lambda symbol: len(symbol['text']), reverse=True): - start, end = entity['indices'][0], entity['indices'][1] + if 'symbols' in tweet['entities']: + for entity in tweet['entities']['symbols']: + temp = {} + temp['start'] = entity['indices'][0] + temp['end'] = entity['indices'][1] - symbol_html = '$%(symbol)s' - display_text = re.sub(r'(?)' + re.escape(orig_tweet_text[start:end]) + r'\b(?!)', - symbol_html % {'symbol': entity['text']}, display_text) + url_html = '$%(symbol)s' % {'symbol': entity['text']} - # Urls - for entity in entities['urls']: - start, end = entity['indices'][0], entity['indices'][1] - if use_display_url and entity.get('display_url') \ - and not use_expanded_url: - shown_url = entity['display_url'] - elif use_expanded_url and entity.get('expanded_url'): - shown_url = entity['expanded_url'] - else: - shown_url = entity['url'] + temp['replacement'] = url_html + entities.append(temp) - url_html = '%s' % (entity['url'], shown_url) - if display_text_start <= start <= display_text_end: - display_text = display_text.replace(orig_tweet_text[start:end], url_html) - else: - suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html) + # URLs + if 'urls' in tweet['entities']: + for entity in tweet['entities']['urls']: + temp = {} + temp['start'] = entity['indices'][0] + temp['end'] = entity['indices'][1] - # Media - if 'media' in entities: - for entity in entities['media']: - start, end = entity['indices'][0], entity['indices'][1] - if use_display_url and entity.get('display_url') \ - and not use_expanded_url: + if use_display_url and entity.get('display_url') and not use_expanded_url: + shown_url = entity['display_url'] + elif use_expanded_url and entity.get('expanded_url'): + shown_url = entity['expanded_url'] + else: + shown_url = entity['url'] + + url_html = '%s' % (entity['url'], shown_url) + + if display_text_start <= temp['start'] <= display_text_end: + temp['replacement'] = url_html + entities.append(temp) + else: + suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html) + + if 'media' in tweet['entities']: + for entity in tweet['entities']['media']: + temp = {} + temp['start'] = entity['indices'][0] + temp['end'] = entity['indices'][1] + + if use_display_url and entity.get('display_url') and not use_expanded_url: shown_url = entity['display_url'] elif use_expanded_url and entity.get('expanded_url'): shown_url = entity['expanded_url'] @@ -619,11 +635,17 @@ class Twython(EndpointsMixin, object): shown_url = entity['url'] url_html = '%s' % (entity['url'], shown_url) - if display_text_start <= start <= display_text_end: - # for compatibility with pre-extended tweets - display_text = display_text.replace(orig_tweet_text[start:end], url_html) + + if display_text_start <= temp['start'] <= display_text_end: + temp['replacement'] = url_html + entities.append(temp) else: - suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html) + suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html) + + # Now do all the replacements, starting from the end, so that the + # start/end indices still work: + for entity in sorted(entities, key=lambda e: e['start'], reverse=True): + display_text = display_text[0:entity['start']] + entity['replacement'] + display_text[entity['end']:] quote_text = '' if expand_quoted_status and tweet.get('is_quote_status') and tweet.get('quoted_status'):