Fix links in tweets when there's a prefix
If a tweet had a prefix (@names that it was replying to) then the length
of these is counted in the indices that show the locations of entities
within the tweet. But we were applying those indices to the 'display'
part of the tweet that doesn't include the prefix.
So, if the tweet was:
@bob Please meet @bill
and the prefix was `@bob `, then the indices for linking `@bill`
are something like `17,21`. But we were applying the link around
`@bill` to the display text part of the tweet, which is:
Please meet @bill
And so the indices no longer lined up with `@bill`.
Now they do, and the same for URLs and hashtags.
This commit is contained in:
parent
354e31b914
commit
73982c78f4
3 changed files with 89 additions and 6 deletions
68
tests/tweets/entities_with_prefix.json
Normal file
68
tests/tweets/entities_with_prefix.json
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
{
|
||||
"created_at":"Sat Jan 06 18:56:35 +0000 2018",
|
||||
"id":949716340755091458,
|
||||
"id_str":"949716340755091458",
|
||||
"full_text":"@philgyford This is a test for @visionphil that includes a link https://t.co/sKw4J3A8SZ and #hashtag and \ud83d\ude03 for good measure AND that is longer than 140 characters. https://t.co/jnQdy7Zg7u",
|
||||
"truncated":false,
|
||||
"display_text_range":[ 12, 187 ],
|
||||
"entities":{
|
||||
"hashtags":[
|
||||
{
|
||||
"text":"hashtag",
|
||||
"indices":[ 92, 100 ]
|
||||
}
|
||||
],
|
||||
"symbols":[ ],
|
||||
"user_mentions":[
|
||||
{
|
||||
"screen_name":"philgyford",
|
||||
"name":"Phil Gyford",
|
||||
"id":12552,
|
||||
"id_str":"12552",
|
||||
"indices":[ 0, 11 ]
|
||||
},
|
||||
{
|
||||
"screen_name":"visionphil",
|
||||
"name":"Vision Phil",
|
||||
"id":104456050,
|
||||
"id_str":"104456050",
|
||||
"indices":[ 31, 42 ]
|
||||
}
|
||||
],
|
||||
"urls":[
|
||||
{
|
||||
"url":"https://t.co/sKw4J3A8SZ",
|
||||
"expanded_url":"http://example.org",
|
||||
"display_url":"example.org",
|
||||
"indices":[ 64, 87 ]
|
||||
},
|
||||
{
|
||||
"url":"https://t.co/jnQdy7Zg7u",
|
||||
"expanded_url":"http://example.com",
|
||||
"display_url":"example.com",
|
||||
"indices":[ 164, 187 ]
|
||||
}
|
||||
]
|
||||
},
|
||||
"source":"<a href=\"http://tapbots.com/software/tweetbot/mac\" rel=\"nofollow\">Tweetbot for Mac</a>",
|
||||
"in_reply_to_status_id":948561036889722880,
|
||||
"in_reply_to_status_id_str":"948561036889722880",
|
||||
"in_reply_to_user_id":12552,
|
||||
"in_reply_to_user_id_str":"12552",
|
||||
"in_reply_to_screen_name":"philgyford",
|
||||
"user":{
|
||||
"id":2030131,
|
||||
"id_str":"2030131"
|
||||
},
|
||||
"geo":null,
|
||||
"coordinates":null,
|
||||
"place":null,
|
||||
"contributors":null,
|
||||
"is_quote_status":false,
|
||||
"retweet_count":0,
|
||||
"favorite_count":0,
|
||||
"favorited":false,
|
||||
"retweeted":false,
|
||||
"possibly_sensitive":false,
|
||||
"lang":"en"
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue