Fix links in tweets when there's a prefix

If a tweet had a prefix (@names that it was replying to) then the length
of these is counted in the indices that show the locations of entities
within the tweet. But we were applying those indices to the 'display'
part of the tweet that doesn't include the prefix.

So, if the tweet was:

    @bob Please meet @bill

and the prefix was `@bob `, then the indices for linking `@bill`
are something like `17,21`. But we were applying the link around
`@bill` to the display text part of the tweet, which is:

    Please meet @bill

And so the indices no longer lined up with `@bill`.

Now they do, and the same for URLs and hashtags.
This commit is contained in:
Phil Gyford 2018-01-06 19:03:17 +00:00
parent 354e31b914
commit 73982c78f4
3 changed files with 89 additions and 6 deletions

View file

@ -89,6 +89,19 @@ class TestHtmlForTweetTestCase(unittest.TestCase):
self.assertEqual(tweet_text,
'Say more about what\'s happening! Rolling out now: photos, videos, GIFs, polls, and Quote Tweets no longer count toward your 140 characters.<span class="twython-tweet-suffix"> <a href="https://t.co/I9pUC0NdZC" class="twython-media">pic.twitter.com/I9pUC0NdZC</a></span>')
def test_entities_with_prefix(self):
"""
If there is a username mention at the start of a tweet it's in the
"prefix" and so isn't part of the main tweet display text.
But its length is still counted in the indices of any subsequent
mentions, urls, hashtags, etc.
"""
self.maxDiff = 2000
tweet_object = self.load_tweet('entities_with_prefix')
tweet_text = self.api.html_for_tweet(tweet_object)
self.assertEqual(tweet_text,
'<span class="twython-tweet-prefix"><a href="https://twitter.com/philgyford" class="twython-mention">@philgyford</a> </span>This is a test for <a href="https://twitter.com/visionphil" class="twython-mention">@visionphil</a> that includes a link <a href="https://t.co/sKw4J3A8SZ" class="twython-url">example.org</a> and <a href="https://twitter.com/search?q=%23hashtag" class="twython-hashtag">#hashtag</a> and 😃 for good measure AND that is longer than 140 characters. <a href="https://t.co/jnQdy7Zg7u" class="twython-url">example.com</a>')
def test_media(self):
tweet_object = self.load_tweet('media')
tweet_text = self.api.html_for_tweet(tweet_object)

View file

@ -0,0 +1,68 @@
{
"created_at":"Sat Jan 06 18:56:35 +0000 2018",
"id":949716340755091458,
"id_str":"949716340755091458",
"full_text":"@philgyford This is a test for @visionphil that includes a link https://t.co/sKw4J3A8SZ and #hashtag and \ud83d\ude03 for good measure AND that is longer than 140 characters. https://t.co/jnQdy7Zg7u",
"truncated":false,
"display_text_range":[ 12, 187 ],
"entities":{
"hashtags":[
{
"text":"hashtag",
"indices":[ 92, 100 ]
}
],
"symbols":[ ],
"user_mentions":[
{
"screen_name":"philgyford",
"name":"Phil Gyford",
"id":12552,
"id_str":"12552",
"indices":[ 0, 11 ]
},
{
"screen_name":"visionphil",
"name":"Vision Phil",
"id":104456050,
"id_str":"104456050",
"indices":[ 31, 42 ]
}
],
"urls":[
{
"url":"https://t.co/sKw4J3A8SZ",
"expanded_url":"http://example.org",
"display_url":"example.org",
"indices":[ 64, 87 ]
},
{
"url":"https://t.co/jnQdy7Zg7u",
"expanded_url":"http://example.com",
"display_url":"example.com",
"indices":[ 164, 187 ]
}
]
},
"source":"<a href=\"http://tapbots.com/software/tweetbot/mac\" rel=\"nofollow\">Tweetbot for Mac</a>",
"in_reply_to_status_id":948561036889722880,
"in_reply_to_status_id_str":"948561036889722880",
"in_reply_to_user_id":12552,
"in_reply_to_user_id_str":"12552",
"in_reply_to_screen_name":"philgyford",
"user":{
"id":2030131,
"id_str":"2030131"
},
"geo":null,
"coordinates":null,
"place":null,
"contributors":null,
"is_quote_status":false,
"retweet_count":0,
"favorite_count":0,
"favorited":false,
"retweeted":false,
"possibly_sensitive":false,
"lang":"en"
}

View file

@ -581,6 +581,8 @@ class Twython(EndpointsMixin, object):
if display_text_start <= temp['start'] <= display_text_end:
temp['replacement'] = mention_html
temp['start'] -= display_text_start
temp['end'] -= display_text_start
entities.append(temp)
else:
# Make the '@username' at the start, before
@ -592,8 +594,8 @@ class Twython(EndpointsMixin, object):
if 'hashtags' in tweet['entities']:
for entity in tweet['entities']['hashtags']:
temp = {}
temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
temp['start'] = entity['indices'][0] - display_text_start
temp['end'] = entity['indices'][1] - display_text_start
url_html = '<a href="https://twitter.com/search?q=%%23%(hashtag)s" class="twython-hashtag">#%(hashtag)s</a>' % {'hashtag': entity['text']}
@ -604,8 +606,8 @@ class Twython(EndpointsMixin, object):
if 'symbols' in tweet['entities']:
for entity in tweet['entities']['symbols']:
temp = {}
temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
temp['start'] = entity['indices'][0] - display_text_start
temp['end'] = entity['indices'][1] - display_text_start
url_html = '<a href="https://twitter.com/search?q=%%24%(symbol)s" class="twython-symbol">$%(symbol)s</a>' % {'symbol': entity['text']}
@ -616,8 +618,8 @@ class Twython(EndpointsMixin, object):
if 'urls' in tweet['entities']:
for entity in tweet['entities']['urls']:
temp = {}
temp['start'] = entity['indices'][0]
temp['end'] = entity['indices'][1]
temp['start'] = entity['indices'][0] - display_text_start
temp['end'] = entity['indices'][1] - display_text_start
if use_display_url and entity.get('display_url') and not use_expanded_url:
shown_url = entity['display_url']