diff --git a/src/api.nim b/src/api.nim index bc5fbd4..523f9e7 100644 --- a/src/api.nim +++ b/src/api.nim @@ -77,7 +77,7 @@ proc getGuestToken(): Future[string] {.async.} = token = result proc getVideo*(tweet: Tweet; token: string) {.async.} = - if not tweet.video.isSome: return + if tweet.video.isNone(): return let headers = newHttpHeaders({ "Accept": "application/json, text/javascript, */*; q=0.01", diff --git a/src/formatters.nim b/src/formatters.nim index 6e5ba52..e4a2890 100644 --- a/src/formatters.nim +++ b/src/formatters.nim @@ -3,6 +3,8 @@ import regex import ./types, ./utils +from unicode import Rune, `$` + const urlRegex = re"((https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?)" emailRegex = re"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)" @@ -10,6 +12,10 @@ const picRegex = re"pic.twitter.com/[^ ]+" cardRegex = re"(https?://)?cards.twitter.com/[^ ]+" ellipsisRegex = re" ?…" + nbsp = $Rune(0x000A0) + +proc stripText*(text: string): string = + text.replace(nbsp, " ").strip() proc shortLink*(text: string; length=28): string = result = text.replace(re"https?://(www.)?", "") @@ -44,7 +50,7 @@ proc reUsernameToLink*(m: RegexMatch; s: string): string = pretext & toLink("/" & username, "@" & username) proc linkifyText*(text: string): string = - result = text.strip() + result = text.stripText() result = result.replace("\n", "
") result = result.replace(ellipsisRegex, "") result = result.replace(usernameRegex, reUsernameToLink) diff --git a/src/parser.nim b/src/parser.nim index fd73727..e95db5b 100644 --- a/src/parser.nim +++ b/src/parser.nim @@ -34,7 +34,7 @@ proc parseIntentProfile*(profile: XmlNode): Profile = proc parseTweetProfile*(profile: XmlNode): Profile = result = Profile( - fullname: profile.getAttr("data-name").stripNbsp(), + fullname: profile.getAttr("data-name").stripText(), username: profile.getAttr("data-screen-name"), userpic: profile.getAvatar(".avatar"), verified: isVerified(profile) @@ -48,7 +48,7 @@ proc parseQuote*(quote: XmlNode): Quote = ) result.profile = Profile( - fullname: quote.selectText(".QuoteTweet-fullname").stripNbsp(), + fullname: quote.selectText(".QuoteTweet-fullname").stripText(), username: quote.getAttr("data-screen-name"), verified: isVerified(quote) ) @@ -71,7 +71,7 @@ proc parseTweet*(tweet: XmlNode): Tweet = let by = tweet.selectText(".js-retweet-text > a > b") if by.len > 0: - result.retweetBy = some(by) + result.retweetBy = some(by.stripText()) result.retweetId = some(tweet.getAttr("data-retweet-id")) let quote = tweet.querySelector(".QuoteTweet-innerContainer") diff --git a/src/parserutils.nim b/src/parserutils.nim index 6325ac9..b670cc1 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -1,11 +1,8 @@ -import xmltree, strtabs, strformat, times +import xmltree, htmlparser, strtabs, strformat, times import nimquery, regex import ./types, ./formatters, ./api -from unicode import Rune, `$` -const nbsp = $Rune(0x000A0) - const thumbRegex = re".+:url\('([^']+)'\)" gifRegex = re".+thumb/([^\.']+)\.jpg.*" @@ -29,9 +26,6 @@ proc getHeader(profile: XmlNode): XmlNode = if result.isNil: result = profile.querySelector(".ProfileCard-userFields") -proc stripNbsp*(text: string): string = - text.replace(nbsp, "") - proc isVerified*(profile: XmlNode): bool = getHeader(profile).selectText(".Icon.Icon--verified").len > 0 @@ -39,22 +33,32 @@ proc isProtected*(profile: XmlNode): bool = getHeader(profile).selectText(".Icon.Icon--protected").len > 0 proc getName*(profile: XmlNode; selector: string): string = - profile.selectText(selector).strip().stripNbsp() + profile.selectText(selector).stripText() proc getUsername*(profile: XmlNode; selector: string): string = profile.selectText(selector).strip(chars={'@', ' '}) +proc emojify*(node: XmlNode) = + for i in node.querySelectorAll(".Emoji"): + i.add newText(i.getAttr("alt")) + proc getTweetText*(tweet: XmlNode): string = - var text = tweet.selectText(".tweet-text") let selector = ".tweet-text > a.twitter-timeline-link.u-hidden" link = tweet.selectAttr(selector, "data-expanded-url") quote = tweet.querySelector(".QuoteTweet") + text = tweet.querySelector(".tweet-text") + hasEmojis = not text.querySelector(".Emoji").isNil + + if hasEmojis: + emojify(text) + + result = stripText(selectText(text, ".tweet-text")) if not quote.isNil and link.len > 0: - text = text.replace(link, "") + result = result.replace(link, "") - stripTwitterUrls(text) + result = stripTwitterUrls(result) proc getTime(tweet: XmlNode): XmlNode = tweet.querySelector(".js-short-timestamp") @@ -67,7 +71,7 @@ proc getShortTime*(tweet: XmlNode): string = getTime(tweet).innerText() proc getBio*(profile: XmlNode; selector: string): string = - profile.selectText(selector).strip() + profile.selectText(selector).stripText() proc getAvatar*(profile: XmlNode; selector: string): string = profile.selectAttr(selector, "src").getUserpic()