Improve replacing of entities with links in html_for_tweet()

I've re-written the parts of `html_for_tweet()` so that it handles
all the replacements of URLs, mentions, symbols and hashtags better.

Mainly to fix #447 but it should be a little more robust generally.

Shamelessly cribbed from https://stackoverflow.com/a/25514650/250962

Passes all tests, but I haven't checked it beyond that.

Fixes #447
This commit is contained in:
Phil Gyford 2017-08-22 13:49:40 +01:00
parent 5a87fc7d84
commit b366ab55c3
3 changed files with 80 additions and 48 deletions

View file

@ -34,3 +34,6 @@ test_tweet_symbols_object = {u'text': u'Some symbols: $AAPL and $PEP and $ANOTHE
test_tweet_compat_object = {u'contributors': None, u'truncated': True, u'text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count tow\u2026 https://t.co/SRmsuks2ru", u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'https://t.co/SRmsuks2ru', u'indices': [117, 140], u'expanded_url': u'https://twitter.com/i/web/status/777915304261193728', u'display_url': u'twitter.com/i/web/status/7\u2026'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None} test_tweet_compat_object = {u'contributors': None, u'truncated': True, u'text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count tow\u2026 https://t.co/SRmsuks2ru", u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'https://t.co/SRmsuks2ru', u'indices': [117, 140], u'expanded_url': u'https://twitter.com/i/web/status/777915304261193728', u'display_url': u'twitter.com/i/web/status/7\u2026'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None}
test_tweet_extended_object = {u'full_text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. https://t.co/I9pUC0NdZC", u'truncated': False, u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'contributors': None, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [], u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'id_str': u'777914712382058496', u'indices': [140, 163], u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'type': u'photo', u'id': 777914712382058496, u'display_url': u'pic.twitter.com/I9pUC0NdZC'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'display_text_range': [0, 139], u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None, u'extended_entities': {u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'display_url': u'pic.twitter.com/I9pUC0NdZC', u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'video_info': {u'aspect_ratio': [1, 1], u'variants': [{u'url': u'https://pbs.twimg.com/tweet_video/Csu1TzEVMAAAEv7.mp4', u'bitrate': 0, u'content_type': u'video/mp4'}]}, u'id_str': u'777914712382058496', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'indices': [140, 163], u'type': u'animated_gif', u'id': 777914712382058496, u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg'}]}} test_tweet_extended_object = {u'full_text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. https://t.co/I9pUC0NdZC", u'truncated': False, u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'contributors': None, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [], u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'id_str': u'777914712382058496', u'indices': [140, 163], u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'type': u'photo', u'id': 777914712382058496, u'display_url': u'pic.twitter.com/I9pUC0NdZC'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'display_text_range': [0, 139], u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None, u'extended_entities': {u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'display_url': u'pic.twitter.com/I9pUC0NdZC', u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'video_info': {u'aspect_ratio': [1, 1], u'variants': [{u'url': u'https://pbs.twimg.com/tweet_video/Csu1TzEVMAAAEv7.mp4', u'bitrate': 0, u'content_type': u'video/mp4'}]}, u'id_str': u'777914712382058496', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'indices': [140, 163], u'type': u'animated_gif', u'id': 777914712382058496, u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg'}]}}
test_tweet_extended_html = 'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters.<span class="twython-tweet-suffix"> <a href="https://t.co/I9pUC0NdZC" class="twython-media">pic.twitter.com/I9pUC0NdZC</a></span>' test_tweet_extended_html = 'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters.<span class="twython-tweet-suffix"> <a href="https://t.co/I9pUC0NdZC" class="twython-media">pic.twitter.com/I9pUC0NdZC</a></span>'
test_tweet_identical_urls = {u'entities': {u'hashtags': [], u'user_mentions': [], u'symbols': [], u'urls': [{u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [42, 65], u'url': u'https://t.co/W0uArTMk9N'}, {u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [101, 124], u'url': u'https://t.co/W0uArTMk9N'}]}, u'full_text': u'Use Cases, Trials and Making 5G a Reality https://t.co/W0uArTMk9N #5G #innovation via @5GWorldSeries https://t.co/W0uArTMk9N'}

View file

@ -4,6 +4,7 @@ from twython import Twython, TwythonError, TwythonAuthError, TwythonRateLimitErr
from .config import ( from .config import (
test_tweet_object, test_tweet_html, test_tweet_symbols_object, test_tweet_object, test_tweet_html, test_tweet_symbols_object,
test_tweet_compat_object, test_tweet_extended_object, test_tweet_extended_html, test_tweet_compat_object, test_tweet_extended_object, test_tweet_extended_html,
test_tweet_identical_urls,
unittest unittest
) )
@ -321,6 +322,12 @@ class TwythonAPITestCase(unittest.TestCase):
self.assertTrue('http://google.com' not in tweet_text) self.assertTrue('http://google.com' not in tweet_text)
self.assertTrue('google.com' not in tweet_text) self.assertTrue('google.com' not in tweet_text)
def test_html_for_tweet_identical_urls(self):
"""If the 'url's for different url entities are identical, they should link correctly."""
tweet_text = self.api.html_for_tweet(test_tweet_identical_urls)
self.assertEqual(tweet_text,
u'Use Cases, Trials and Making 5G a Reality <a href="https://t.co/W0uArTMk9N" class="twython-url">buff.ly/2sEhrgO</a> #5G #innovation via @5GWorldSeries <a href="https://t.co/W0uArTMk9N" class="twython-url">buff.ly/2sEhrgO</a>')
def test_html_for_tweet_symbols(self): def test_html_for_tweet_symbols(self):
tweet_text = self.api.html_for_tweet(test_tweet_symbols_object) tweet_text = self.api.html_for_tweet(test_tweet_symbols_object)
# Should only link symbols listed in entities: # Should only link symbols listed in entities:

View file

@ -556,62 +556,78 @@ class Twython(EndpointsMixin, object):
suffix_text = orig_tweet_text[display_text_end:len(orig_tweet_text)] suffix_text = orig_tweet_text[display_text_end:len(orig_tweet_text)]
if 'entities' in tweet: if 'entities' in tweet:
entities = tweet['entities'] # We'll put all the bits of replacement HTML and their starts/ends
# in this list:
entities = []
# Mentions # Mentions
for entity in sorted(entities['user_mentions'], if 'user_mentions' in tweet['entities']:
key=lambda mention: len(mention['screen_name']), reverse=True): for entity in tweet['entities']['user_mentions']:
start, end = entity['indices'][0], entity['indices'][1] temp = {}
temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
mention_html = '<a href="https://twitter.com/%(screen_name)s" ' \ mention_html = '<a href="https://twitter.com/%(screen_name)s" class="twython-mention">@%(screen_name)s</a>' % {'screen_name': entity['screen_name']}
'class="twython-mention">@%(screen_name)s</a>' % {'screen_name': entity['screen_name']}
sub_expr = r'(?<!>)' + orig_tweet_text[start:end] + '(?!</a>)' if display_text_start <= temp['start'] <= display_text_end:
if display_text_start <= start <= display_text_end: temp['replacement'] = mention_html
display_text = re.sub(sub_expr, mention_html, display_text) entities.append(temp)
else: else:
prefix_text = re.sub(sub_expr, mention_html, prefix_text) prefix_text = re.sub(sub_expr, mention_html, prefix_text)
# Hashtags # Hashtags
for entity in sorted(entities['hashtags'], if 'hashtags' in tweet['entities']:
key=lambda hashtag: len(hashtag['text']), reverse=True): for entity in tweet['entities']['hashtags']:
start, end = entity['indices'][0], entity['indices'][1] temp = {}
temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
hashtag_html = '<a href="https://twitter.com/search?q=%%23%(hashtag)s" class="twython-hashtag">#%(hashtag)s</a>' url_html = '<a href="https://twitter.com/search?q=%%23%(hashtag)s" class="twython-hashtag">#%(hashtag)s</a>' % {'hashtag': entity['text']}
display_text = re.sub(r'(?<!>)' + orig_tweet_text[start:end] + '(?!</a>)',
hashtag_html % {'hashtag': entity['text']}, display_text) temp['replacement'] = url_html
entities.append(temp)
# Symbols # Symbols
for entity in sorted(entities['symbols'], if 'symbols' in tweet['entities']:
key=lambda symbol: len(symbol['text']), reverse=True): for entity in tweet['entities']['symbols']:
start, end = entity['indices'][0], entity['indices'][1] temp = {}
temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
symbol_html = '<a href="https://twitter.com/search?q=%%24%(symbol)s" class="twython-symbol">$%(symbol)s</a>' url_html = '<a href="https://twitter.com/search?q=%%24%(symbol)s" class="twython-symbol">$%(symbol)s</a>' % {'symbol': entity['text']}
display_text = re.sub(r'(?<!>)' + re.escape(orig_tweet_text[start:end]) + r'\b(?!</a>)',
symbol_html % {'symbol': entity['text']}, display_text)
# Urls temp['replacement'] = url_html
for entity in entities['urls']: entities.append(temp)
start, end = entity['indices'][0], entity['indices'][1]
if use_display_url and entity.get('display_url') \
and not use_expanded_url:
shown_url = entity['display_url']
elif use_expanded_url and entity.get('expanded_url'):
shown_url = entity['expanded_url']
else:
shown_url = entity['url']
url_html = '<a href="%s" class="twython-url">%s</a>' % (entity['url'], shown_url) # URLs
if display_text_start <= start <= display_text_end: if 'urls' in tweet['entities']:
display_text = display_text.replace(orig_tweet_text[start:end], url_html) for entity in tweet['entities']['urls']:
else: temp = {}
suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html) temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
# Media if use_display_url and entity.get('display_url') and not use_expanded_url:
if 'media' in entities: shown_url = entity['display_url']
for entity in entities['media']: elif use_expanded_url and entity.get('expanded_url'):
start, end = entity['indices'][0], entity['indices'][1] shown_url = entity['expanded_url']
if use_display_url and entity.get('display_url') \ else:
and not use_expanded_url: shown_url = entity['url']
url_html = '<a href="%s" class="twython-url">%s</a>' % (entity['url'], shown_url)
if display_text_start <= temp['start'] <= display_text_end:
temp['replacement'] = url_html
entities.append(temp)
else:
suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html)
if 'media' in tweet['entities']:
for entity in tweet['entities']['media']:
temp = {}
temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
if use_display_url and entity.get('display_url') and not use_expanded_url:
shown_url = entity['display_url'] shown_url = entity['display_url']
elif use_expanded_url and entity.get('expanded_url'): elif use_expanded_url and entity.get('expanded_url'):
shown_url = entity['expanded_url'] shown_url = entity['expanded_url']
@ -619,11 +635,17 @@ class Twython(EndpointsMixin, object):
shown_url = entity['url'] shown_url = entity['url']
url_html = '<a href="%s" class="twython-media">%s</a>' % (entity['url'], shown_url) url_html = '<a href="%s" class="twython-media">%s</a>' % (entity['url'], shown_url)
if display_text_start <= start <= display_text_end:
# for compatibility with pre-extended tweets if display_text_start <= temp['start'] <= display_text_end:
display_text = display_text.replace(orig_tweet_text[start:end], url_html) temp['replacement'] = url_html
entities.append(temp)
else: else:
suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html) suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html)
# Now do all the replacements, starting from the end, so that the
# start/end indices still work:
for entity in sorted(entities, key=lambda e: e['start'], reverse=True):
display_text = display_text[0:entity['start']] + entity['replacement'] + display_text[entity['end']:]
quote_text = '' quote_text = ''
if expand_quoted_status and tweet.get('is_quote_status') and tweet.get('quoted_status'): if expand_quoted_status and tweet.get('is_quote_status') and tweet.get('quoted_status'):