Improve replacing of entities with links in html_for_tweet()
I've re-written the parts of `html_for_tweet()` so that it handles all the replacements of URLs, mentions, symbols and hashtags better. Mainly to fix #447 but it should be a little more robust generally. Shamelessly cribbed from https://stackoverflow.com/a/25514650/250962 Passes all tests, but I haven't checked it beyond that. Fixes #447
This commit is contained in:
parent
5a87fc7d84
commit
b366ab55c3
3 changed files with 80 additions and 48 deletions
|
|
@ -34,3 +34,6 @@ test_tweet_symbols_object = {u'text': u'Some symbols: $AAPL and $PEP and $ANOTHE
|
|||
test_tweet_compat_object = {u'contributors': None, u'truncated': True, u'text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count tow\u2026 https://t.co/SRmsuks2ru", u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [{u'url': u'https://t.co/SRmsuks2ru', u'indices': [117, 140], u'expanded_url': u'https://twitter.com/i/web/status/777915304261193728', u'display_url': u'twitter.com/i/web/status/7\u2026'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None}
|
||||
test_tweet_extended_object = {u'full_text': u"Say more about what's happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. https://t.co/I9pUC0NdZC", u'truncated': False, u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 777915304261193728, u'favorite_count': 13856, u'contributors': None, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'retweeted': False, u'coordinates': None, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': [], u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'id_str': u'777914712382058496', u'indices': [140, 163], u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'type': u'photo', u'id': 777914712382058496, u'display_url': u'pic.twitter.com/I9pUC0NdZC'}]}, u'in_reply_to_screen_name': None, u'id_str': u'777915304261193728', u'display_text_range': [0, 139], u'retweet_count': 14767, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'id': 783214, u'verified': True, u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'profile_sidebar_fill_color': u'F6F6F6', u'is_translator': False, u'geo_enabled': True, u'entities': {u'url': {u'urls': [{u'url': u'http://t.co/5iRhy7wTgu', u'indices': [0, 22], u'expanded_url': u'http://blog.twitter.com/', u'display_url': u'blog.twitter.com'}]}, u'description': {u'urls': [{u'url': u'https://t.co/qq1HEzvnrA', u'indices': [84, 107], u'expanded_url': u'http://support.twitter.com', u'display_url': u'support.twitter.com'}]}}, u'followers_count': 56827498, u'protected': False, u'location': u'San Francisco, CA', u'default_profile_image': False, u'id_str': u'783214', u'lang': u'en', u'utc_offset': -25200, u'statuses_count': 3161, u'description': u'Your official source for news, updates and tips from Twitter, Inc. Need help? Visit https://t.co/qq1HEzvnrA.', u'friends_count': 145, u'profile_link_color': u'226699', u'profile_image_url': u'http://pbs.twimg.com/profile_images/767879603977191425/29zfZY6I_normal.jpg', u'notifications': False, u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'profile_background_color': u'ACDED6', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/783214/1471929200', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/657090062/l1uqey5sy82r9ijhke1i.png', u'name': u'Twitter', u'is_translation_enabled': False, u'profile_background_tile': True, u'favourites_count': 2332, u'screen_name': u'twitter', u'url': u'http://t.co/5iRhy7wTgu', u'created_at': u'Tue Feb 20 14:35:54 +0000 2007', u'contributors_enabled': False, u'time_zone': u'Pacific Time (US & Canada)', u'profile_sidebar_border_color': u'FFFFFF', u'default_profile': False, u'following': False, u'listed_count': 90445}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'possibly_sensitive_appealable': False, u'lang': u'en', u'created_at': u'Mon Sep 19 17:00:36 +0000 2016', u'in_reply_to_status_id_str': None, u'place': None, u'extended_entities': {u'media': [{u'expanded_url': u'https://twitter.com/twitter/status/777915304261193728/photo/1', u'display_url': u'pic.twitter.com/I9pUC0NdZC', u'url': u'https://t.co/I9pUC0NdZC', u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg', u'video_info': {u'aspect_ratio': [1, 1], u'variants': [{u'url': u'https://pbs.twimg.com/tweet_video/Csu1TzEVMAAAEv7.mp4', u'bitrate': 0, u'content_type': u'video/mp4'}]}, u'id_str': u'777914712382058496', u'sizes': {u'small': {u'h': 340, u'w': 340, u'resize': u'fit'}, u'large': {u'h': 700, u'w': 700, u'resize': u'fit'}, u'medium': {u'h': 600, u'w': 600, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'indices': [140, 163], u'type': u'animated_gif', u'id': 777914712382058496, u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/Csu1TzEVMAAAEv7.jpg'}]}}
|
||||
test_tweet_extended_html = 'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters.<span class="twython-tweet-suffix"> <a href="https://t.co/I9pUC0NdZC" class="twython-media">pic.twitter.com/I9pUC0NdZC</a></span>'
|
||||
|
||||
test_tweet_identical_urls = {u'entities': {u'hashtags': [], u'user_mentions': [], u'symbols': [], u'urls': [{u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [42, 65], u'url': u'https://t.co/W0uArTMk9N'}, {u'display_url': u'buff.ly/2sEhrgO', u'expanded_url': u'http://buff.ly/2sEhrgO', u'indices': [101, 124], u'url': u'https://t.co/W0uArTMk9N'}]}, u'full_text': u'Use Cases, Trials and Making 5G a Reality https://t.co/W0uArTMk9N #5G #innovation via @5GWorldSeries https://t.co/W0uArTMk9N'}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue