Merge pull request #451 from philgyford/fix-447
Improve replacing of entities with links in `html_for_tweet()`
This commit is contained in:
commit
8368956f86
3 changed files with 92 additions and 48 deletions
|
|
@ -34,3 +34,6 @@ test_tweet_symbols_object = {u'text': u'Some symbols: $AAPL and $PEP and $ANOTHE
|
|||
test_tweet_compat_object = {u'contributors': None, u'truncated': True, u'text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count tow\u2026 https://t.co/SRmsuks2ru", u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'https://t.co/SRmsuks2ru', u'indices': [117, 140], u'expanded_url': u'https://twitter.com/i/web/status/777915304261193728', u'display_url': u'twitter.com/i/web/status/7\u2026'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None}
|
||||
test_tweet_extended_object = {u'full_text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. https://t.co/I9pUC0NdZC", u'truncated': False, u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'contributors': None, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [], u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'id_str': u'777914712382058496', u'indices': [140, 163], u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'type': u'photo', u'id': 777914712382058496, u'display_url': u'pic.twitter.com/I9pUC0NdZC'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'display_text_range': [0, 139], u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None, u'extended_entities': {u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'display_url': u'pic.twitter.com/I9pUC0NdZC', u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'video_info': {u'aspect_ratio': [1, 1], u'variants': [{u'url': u'https://pbs.twimg.com/tweet_video/Csu1TzEVMAAAEv7.mp4', u'bitrate': 0, u'content_type': u'video/mp4'}]}, u'id_str': u'777914712382058496', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'indices': [140, 163], u'type': u'animated_gif', u'id': 777914712382058496, u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg'}]}}
|
||||
test_tweet_extended_html = 'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters.<span class="twython-tweet-suffix"> <a href="https://t.co/I9pUC0NdZC" class="twython-media">pic.twitter.com/I9pUC0NdZC</a></span>'
|
||||
|
||||
test_tweet_identical_urls = {u'entities': {u'hashtags': [], u'user_mentions': [], u'symbols': [], u'urls': [{u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [42, 65], u'url': u'https://t.co/W0uArTMk9N'}, {u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [101, 124], u'url': u'https://t.co/W0uArTMk9N'}]}, u'full_text': u'Use Cases, Trials and Making 5G a Reality https://t.co/W0uArTMk9N #5G #innovation via @5GWorldSeries https://t.co/W0uArTMk9N'}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from twython import Twython, TwythonError, TwythonAuthError, TwythonRateLimitErr
|
|||
from .config import (
|
||||
test_tweet_object, test_tweet_html, test_tweet_symbols_object,
|
||||
test_tweet_compat_object, test_tweet_extended_object, test_tweet_extended_html,
|
||||
test_tweet_identical_urls,
|
||||
unittest
|
||||
)
|
||||
|
||||
|
|
@ -321,12 +322,30 @@ class TwythonAPITestCase(unittest.TestCase):
|
|||
self.assertTrue('http://google.com' not in tweet_text)
|
||||
self.assertTrue('google.com' not in tweet_text)
|
||||
|
||||
def test_html_for_tweet_identical_urls(self):
|
||||
"""If the 'url's for different url entities are identical, they should link correctly."""
|
||||
tweet_text = self.api.html_for_tweet(test_tweet_identical_urls)
|
||||
self.assertEqual(tweet_text,
|
||||
u'Use Cases, Trials and Making 5G a Reality <a href="https://t.co/W0uArTMk9N" class="twython-url">buff.ly/2sEhrgO</a> #5G #innovation via @5GWorldSeries <a href="https://t.co/W0uArTMk9N" class="twython-url">buff.ly/2sEhrgO</a>')
|
||||
|
||||
def test_html_for_tweet_symbols(self):
|
||||
tweet_text = self.api.html_for_tweet(test_tweet_symbols_object)
|
||||
# Should only link symbols listed in entities:
|
||||
self.assertTrue('<a href="https://twitter.com/search?q=%24AAPL" class="twython-symbol">$AAPL</a>' in tweet_text)
|
||||
self.assertTrue('<a href="https://twitter.com/search?q=%24ANOTHER" class="twython-symbol">$ANOTHER</a>' not in tweet_text)
|
||||
|
||||
def test_html_for_tweet_no_symbols(self):
|
||||
"""Should still work if tweet object has no symbols list"""
|
||||
tweet = test_tweet_symbols_object
|
||||
# Save a copy:
|
||||
symbols = tweet['entities']['symbols']
|
||||
del tweet['entities']['symbols']
|
||||
tweet_text = self.api.html_for_tweet(tweet)
|
||||
self.assertTrue('symbols: $AAPL and' in tweet_text)
|
||||
self.assertTrue('and $ANOTHER and $A.' in tweet_text)
|
||||
# Put the symbols back:
|
||||
test_tweet_symbols_object['entities']['symbols'] = symbols
|
||||
|
||||
def test_html_for_tweet_compatmode(self):
|
||||
tweet_text = self.api.html_for_tweet(test_tweet_compat_object)
|
||||
# link to compat web status link
|
||||
|
|
|
|||
118
twython/api.py
118
twython/api.py
|
|
@ -556,62 +556,78 @@ class Twython(EndpointsMixin, object):
|
|||
suffix_text = orig_tweet_text[display_text_end:len(orig_tweet_text)]
|
||||
|
||||
if 'entities' in tweet:
|
||||
entities = tweet['entities']
|
||||
# We'll put all the bits of replacement HTML and their starts/ends
|
||||
# in this list:
|
||||
entities = []
|
||||
|
||||
# Mentions
|
||||
for entity in sorted(entities['user_mentions'],
|
||||
key=lambda mention: len(mention['screen_name']), reverse=True):
|
||||
start, end = entity['indices'][0], entity['indices'][1]
|
||||
if 'user_mentions' in tweet['entities']:
|
||||
for entity in tweet['entities']['user_mentions']:
|
||||
temp = {}
|
||||
temp['start'] = entity['indices'][0]
|
||||
temp['end'] = entity['indices'][1]
|
||||
|
||||
mention_html = '<a href="https://twitter.com/%(screen_name)s" ' \
|
||||
'class="twython-mention">@%(screen_name)s</a>' % {'screen_name': entity['screen_name']}
|
||||
sub_expr = r'(?<!>)' + orig_tweet_text[start:end] + '(?!</a>)'
|
||||
if display_text_start <= start <= display_text_end:
|
||||
display_text = re.sub(sub_expr, mention_html, display_text)
|
||||
else:
|
||||
prefix_text = re.sub(sub_expr, mention_html, prefix_text)
|
||||
mention_html = '<a href="https://twitter.com/%(screen_name)s" class="twython-mention">@%(screen_name)s</a>' % {'screen_name': entity['screen_name']}
|
||||
|
||||
if display_text_start <= temp['start'] <= display_text_end:
|
||||
temp['replacement'] = mention_html
|
||||
entities.append(temp)
|
||||
else:
|
||||
prefix_text = re.sub(sub_expr, mention_html, prefix_text)
|
||||
|
||||
# Hashtags
|
||||
for entity in sorted(entities['hashtags'],
|
||||
key=lambda hashtag: len(hashtag['text']), reverse=True):
|
||||
start, end = entity['indices'][0], entity['indices'][1]
|
||||
if 'hashtags' in tweet['entities']:
|
||||
for entity in tweet['entities']['hashtags']:
|
||||
temp = {}
|
||||
temp['start'] = entity['indices'][0]
|
||||
temp['end'] = entity['indices'][1]
|
||||
|
||||
hashtag_html = '<a href="https://twitter.com/search?q=%%23%(hashtag)s" class="twython-hashtag">#%(hashtag)s</a>'
|
||||
display_text = re.sub(r'(?<!>)' + orig_tweet_text[start:end] + '(?!</a>)',
|
||||
hashtag_html % {'hashtag': entity['text']}, display_text)
|
||||
url_html = '<a href="https://twitter.com/search?q=%%23%(hashtag)s" class="twython-hashtag">#%(hashtag)s</a>' % {'hashtag': entity['text']}
|
||||
|
||||
temp['replacement'] = url_html
|
||||
entities.append(temp)
|
||||
|
||||
# Symbols
|
||||
for entity in sorted(entities['symbols'],
|
||||
key=lambda symbol: len(symbol['text']), reverse=True):
|
||||
start, end = entity['indices'][0], entity['indices'][1]
|
||||
if 'symbols' in tweet['entities']:
|
||||
for entity in tweet['entities']['symbols']:
|
||||
temp = {}
|
||||
temp['start'] = entity['indices'][0]
|
||||
temp['end'] = entity['indices'][1]
|
||||
|
||||
symbol_html = '<a href="https://twitter.com/search?q=%%24%(symbol)s" class="twython-symbol">$%(symbol)s</a>'
|
||||
display_text = re.sub(r'(?<!>)' + re.escape(orig_tweet_text[start:end]) + r'\b(?!</a>)',
|
||||
symbol_html % {'symbol': entity['text']}, display_text)
|
||||
url_html = '<a href="https://twitter.com/search?q=%%24%(symbol)s" class="twython-symbol">$%(symbol)s</a>' % {'symbol': entity['text']}
|
||||
|
||||
# Urls
|
||||
for entity in entities['urls']:
|
||||
start, end = entity['indices'][0], entity['indices'][1]
|
||||
if use_display_url and entity.get('display_url') \
|
||||
and not use_expanded_url:
|
||||
shown_url = entity['display_url']
|
||||
elif use_expanded_url and entity.get('expanded_url'):
|
||||
shown_url = entity['expanded_url']
|
||||
else:
|
||||
shown_url = entity['url']
|
||||
temp['replacement'] = url_html
|
||||
entities.append(temp)
|
||||
|
||||
url_html = '<a href="%s" class="twython-url">%s</a>' % (entity['url'], shown_url)
|
||||
if display_text_start <= start <= display_text_end:
|
||||
display_text = display_text.replace(orig_tweet_text[start:end], url_html)
|
||||
else:
|
||||
suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html)
|
||||
# URLs
|
||||
if 'urls' in tweet['entities']:
|
||||
for entity in tweet['entities']['urls']:
|
||||
temp = {}
|
||||
temp['start'] = entity['indices'][0]
|
||||
temp['end'] = entity['indices'][1]
|
||||
|
||||
# Media
|
||||
if 'media' in entities:
|
||||
for entity in entities['media']:
|
||||
start, end = entity['indices'][0], entity['indices'][1]
|
||||
if use_display_url and entity.get('display_url') \
|
||||
and not use_expanded_url:
|
||||
if use_display_url and entity.get('display_url') and not use_expanded_url:
|
||||
shown_url = entity['display_url']
|
||||
elif use_expanded_url and entity.get('expanded_url'):
|
||||
shown_url = entity['expanded_url']
|
||||
else:
|
||||
shown_url = entity['url']
|
||||
|
||||
url_html = '<a href="%s" class="twython-url">%s</a>' % (entity['url'], shown_url)
|
||||
|
||||
if display_text_start <= temp['start'] <= display_text_end:
|
||||
temp['replacement'] = url_html
|
||||
entities.append(temp)
|
||||
else:
|
||||
suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html)
|
||||
|
||||
if 'media' in tweet['entities']:
|
||||
for entity in tweet['entities']['media']:
|
||||
temp = {}
|
||||
temp['start'] = entity['indices'][0]
|
||||
temp['end'] = entity['indices'][1]
|
||||
|
||||
if use_display_url and entity.get('display_url') and not use_expanded_url:
|
||||
shown_url = entity['display_url']
|
||||
elif use_expanded_url and entity.get('expanded_url'):
|
||||
shown_url = entity['expanded_url']
|
||||
|
|
@ -619,11 +635,17 @@ class Twython(EndpointsMixin, object):
|
|||
shown_url = entity['url']
|
||||
|
||||
url_html = '<a href="%s" class="twython-media">%s</a>' % (entity['url'], shown_url)
|
||||
if display_text_start <= start <= display_text_end:
|
||||
# for compatibility with pre-extended tweets
|
||||
display_text = display_text.replace(orig_tweet_text[start:end], url_html)
|
||||
|
||||
if display_text_start <= temp['start'] <= display_text_end:
|
||||
temp['replacement'] = url_html
|
||||
entities.append(temp)
|
||||
else:
|
||||
suffix_text = suffix_text.replace(orig_tweet_text[start:end], url_html)
|
||||
suffix_text = suffix_text.replace(orig_tweet_text[temp['start']:temp['end']], url_html)
|
||||
|
||||
# Now do all the replacements, starting from the end, so that the
|
||||
# start/end indices still work:
|
||||
for entity in sorted(entities, key=lambda e: e['start'], reverse=True):
|
||||
display_text = display_text[0:entity['start']] + entity['replacement'] + display_text[entity['end']:]
|
||||
|
||||
quote_text = ''
|
||||
if expand_quoted_status and tweet.get('is_quote_status') and tweet.get('quoted_status'):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue