From 73982c78f4bd541c82916c77d7aeb4ad757e9007 Mon Sep 17 00:00:00 2001 From: Phil Gyford Date: Sat, 6 Jan 2018 19:03:17 +0000 Subject: [PATCH] Fix links in tweets when there's a prefix If a tweet had a prefix (@names that it was replying to) then the length of these is counted in the indices that show the locations of entities within the tweet. But we were applying those indices to the 'display' part of the tweet that doesn't include the prefix. So, if the tweet was: @bob Please meet @bill and the prefix was `@bob `, then the indices for linking `@bill` are something like `17,21`. But we were applying the link around `@bill` to the display text part of the tweet, which is: Please meet @bill And so the indices no longer lined up with `@bill`. Now they do, and the same for URLs and hashtags. --- tests/test_html_for_tweet.py | 13 +++++ tests/tweets/entities_with_prefix.json | 68 ++++++++++++++++++++++++++ twython/api.py | 14 +++--- 3 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 tests/tweets/entities_with_prefix.json diff --git a/tests/test_html_for_tweet.py b/tests/test_html_for_tweet.py index 7331fa4..934c2dc 100644 --- a/tests/test_html_for_tweet.py +++ b/tests/test_html_for_tweet.py @@ -89,6 +89,19 @@ class TestHtmlForTweetTestCase(unittest.TestCase): self.assertEqual(tweet_text, 'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters. pic.twitter.com/I9pUC0NdZC') + def test_entities_with_prefix(self): + """ + If there is a username mention at the start of a tweet it's in the + "prefix" and so isn't part of the main tweet display text. + But its length is still counted in the indices of any subsequent + mentions, urls, hashtags, etc. + """ + self.maxDiff = 2000 + tweet_object = self.load_tweet('entities_with_prefix') + tweet_text = self.api.html_for_tweet(tweet_object) + self.assertEqual(tweet_text, + '@philgyford This is a test for @visionphil that includes a link example.org and #hashtag and 😃 for good measure AND that is longer than 140 characters. example.com') + def test_media(self): tweet_object = self.load_tweet('media') tweet_text = self.api.html_for_tweet(tweet_object) diff --git a/tests/tweets/entities_with_prefix.json b/tests/tweets/entities_with_prefix.json new file mode 100644 index 0000000..72043f9 --- /dev/null +++ b/tests/tweets/entities_with_prefix.json @@ -0,0 +1,68 @@ +{ + "created_at":"Sat Jan 06 18:56:35 +0000 2018", + "id":949716340755091458, + "id_str":"949716340755091458", + "full_text":"@philgyford This is a test for @visionphil that includes a link https://t.co/sKw4J3A8SZ and #hashtag and \ud83d\ude03 for good measure AND that is longer than 140 characters. https://t.co/jnQdy7Zg7u", + "truncated":false, + "display_text_range":[ 12, 187 ], + "entities":{ + "hashtags":[ + { + "text":"hashtag", + "indices":[ 92, 100 ] + } + ], + "symbols":[ ], + "user_mentions":[ + { + "screen_name":"philgyford", + "name":"Phil Gyford", + "id":12552, + "id_str":"12552", + "indices":[ 0, 11 ] + }, + { + "screen_name":"visionphil", + "name":"Vision Phil", + "id":104456050, + "id_str":"104456050", + "indices":[ 31, 42 ] + } + ], + "urls":[ + { + "url":"https://t.co/sKw4J3A8SZ", + "expanded_url":"http://example.org", + "display_url":"example.org", + "indices":[ 64, 87 ] + }, + { + "url":"https://t.co/jnQdy7Zg7u", + "expanded_url":"http://example.com", + "display_url":"example.com", + "indices":[ 164, 187 ] + } + ] + }, + "source":"Tweetbot for Mac", + "in_reply_to_status_id":948561036889722880, + "in_reply_to_status_id_str":"948561036889722880", + "in_reply_to_user_id":12552, + "in_reply_to_user_id_str":"12552", + "in_reply_to_screen_name":"philgyford", + "user":{ + "id":2030131, + "id_str":"2030131" + }, + "geo":null, + "coordinates":null, + "place":null, + "contributors":null, + "is_quote_status":false, + "retweet_count":0, + "favorite_count":0, + "favorited":false, + "retweeted":false, + "possibly_sensitive":false, + "lang":"en" +} diff --git a/twython/api.py b/twython/api.py index f25cc97..57a10a6 100644 --- a/twython/api.py +++ b/twython/api.py @@ -581,6 +581,8 @@ class Twython(EndpointsMixin, object): if display_text_start <= temp['start'] <= display_text_end: temp['replacement'] = mention_html + temp['start'] -= display_text_start + temp['end'] -= display_text_start entities.append(temp) else: # Make the '@username' at the start, before @@ -592,8 +594,8 @@ class Twython(EndpointsMixin, object): if 'hashtags' in tweet['entities']: for entity in tweet['entities']['hashtags']: temp = {} - temp['start'] = entity['indices'][0] - temp['end'] = entity['indices'][1] + temp['start'] = entity['indices'][0] - display_text_start + temp['end'] = entity['indices'][1] - display_text_start url_html = '#%(hashtag)s' % {'hashtag': entity['text']} @@ -604,8 +606,8 @@ class Twython(EndpointsMixin, object): if 'symbols' in tweet['entities']: for entity in tweet['entities']['symbols']: temp = {} - temp['start'] = entity['indices'][0] - temp['end'] = entity['indices'][1] + temp['start'] = entity['indices'][0] - display_text_start + temp['end'] = entity['indices'][1] - display_text_start url_html = '$%(symbol)s' % {'symbol': entity['text']} @@ -616,8 +618,8 @@ class Twython(EndpointsMixin, object): if 'urls' in tweet['entities']: for entity in tweet['entities']['urls']: temp = {} - temp['start'] = entity['indices'][0] - temp['end'] = entity['indices'][1] + temp['start'] = entity['indices'][0] - display_text_start + temp['end'] = entity['indices'][1] - display_text_start if use_display_url and entity.get('display_url') and not use_expanded_url: shown_url = entity['display_url']