From 762d00b21dc518069b147e758004277e4971f574 Mon Sep 17 00:00:00 2001 From: Zed Date: Mon, 1 Jun 2020 02:16:24 +0200 Subject: [PATCH] In with the new --- src/api.nim | 87 +++++++++- src/apiutils.nim | 63 ++++++++ src/consts.nim | 55 +++++++ src/parser.nim | 383 ++++++++++++++++++++++++++++++++++++++++++++ src/parserutils.nim | 212 ++++++++++++++++++++++++ src/redis_cache.nim | 114 +++++++++++++ src/tokens.nim | 59 +++++++ 7 files changed, 971 insertions(+), 2 deletions(-) create mode 100644 src/apiutils.nim create mode 100644 src/consts.nim create mode 100644 src/parser.nim create mode 100644 src/parserutils.nim create mode 100644 src/redis_cache.nim create mode 100644 src/tokens.nim diff --git a/src/api.nim b/src/api.nim index e087673..f4b249e 100644 --- a/src/api.nim +++ b/src/api.nim @@ -1,2 +1,85 @@ -import api/[profile, timeline, tweet, search, media, list, resolver] -export profile, timeline, tweet, search, media, list, resolver +import asyncdispatch, httpclient, uri, json, strutils, options +import types, query, formatters, consts, apiutils, parser + +proc getGraphProfile*(username: string): Future[Profile] {.async.} = + let + variables = %*{"screen_name": username, "withHighlightedLabel": true} + js = await fetch(graphUser ? {"variables": $variables}) + result = parseGraphProfile(js, username) + +proc getGraphList*(name, list: string): Future[List] {.async.} = + let + variables = %*{"screenName": name, "listSlug": list, "withHighlightedLabel": false} + js = await fetch(graphList ? {"variables": $variables}) + result = parseGraphList(js) + +proc getGraphListById*(id: string): Future[List] {.async.} = + let + variables = %*{"listId": id, "withHighlightedLabel": false} + js = await fetch(graphListId ? {"variables": $variables}) + result = parseGraphList(js) + +proc getListTimeline*(id: string; after=""): Future[Timeline] {.async.} = + let + ps = genParams({"list_id": id, "ranking_mode": "reverse_chronological"}, after) + url = listTimeline ? ps + result = parseTimeline(await fetch(url), after) + +proc getListMembers*(list: List; after=""): Future[Result[Profile]] {.async.} = + if list.id.len == 0: return + let + ps = genParams({"list_id": list.id}, after) + url = listMembers ? ps + result = parseListMembers(await fetch(url, oldApi=true), after) + +proc getTimeline*(id: string; after=""; replies=false): Future[Timeline] {.async.} = + let + ps = genParams({"userId": id, "include_tweet_replies": $replies}, after) + url = timeline / (id & ".json") ? ps + result = parseTimeline(await fetch(url), after) + +proc getMediaTimeline*(id: string; after=""): Future[Timeline] {.async.} = + let url = mediaTimeline / (id & ".json") ? genParams(cursor=after) + result = parseTimeline(await fetch(url), after) + +proc getPhotoRail*(id: string): Future[PhotoRail] {.async.} = + result = parsePhotoRail(await getMediaTimeline(id)) + +proc getSearch*[T](query: Query; after=""): Future[Result[T]] {.async.} = + when T is Profile: + const + searchMode = ("result_filter", "user") + parse = parseUsers + else: + const + searchMode = ("tweet_search_mode", "live") + parse = parseTimeline + + let + q = genQueryParam(query) + url = search ? genParams(searchParams & @[("q", q), searchMode], after) + result = parse(await fetch(url), after) + result.query = query + +proc getTweetImpl(id: string; after=""): Future[Conversation] {.async.} = + let url = tweet / (id & ".json") ? genParams(cursor=after) + result = parseConversation(await fetch(url), id) + +proc getReplies*(id, after: string): Future[Result[Chain]] {.async.} = + result = (await getTweetImpl(id, after)).replies + result.beginning = after.len == 0 + +proc getTweet*(id: string; after=""): Future[Conversation] {.async.} = + result = await getTweetImpl(id) + if after.len > 0: + result.replies = await getReplies(id, after) + +proc resolve*(url: string; prefs: Prefs): Future[string] {.async.} = + let client = newAsyncHttpClient(maxRedirects=0) + try: + let resp = await client.request(url, $HttpHead) + result = resp.headers["location"].replaceUrl(prefs) + except: + discard + finally: + client.close() diff --git a/src/apiutils.nim b/src/apiutils.nim new file mode 100644 index 0000000..cf188ed --- /dev/null +++ b/src/apiutils.nim @@ -0,0 +1,63 @@ +import httpclient, asyncdispatch, options, times, strutils, json, uri +import types, agents, tokens, consts + +proc genParams*(pars: openarray[(string, string)] = @[]; + cursor=""): seq[(string, string)] = + result = timelineParams + for p in pars: + result &= p + if cursor.len > 0: + result &= ("cursor", cursor) + +proc genHeaders*(token: Token): HttpHeaders = + result = newHttpHeaders({ + "DNT": "1", + "authorization": auth, + "content-type": "application/json", + "user-agent": getAgent(), + "x-guest-token": if token == nil: "" else: token.tok, + "x-twitter-active-user": "yes", + "authority": "api.twitter.com", + "accept-language": "en-US,en;q=0.9", + "accept": "*/*", + }) + +proc fetch*(url: Uri; retried=false; oldApi=false): Future[JsonNode] {.async.} = + var + token = await getToken() + keepToken = true + proxy: Proxy = when defined(proxy): newProxy(prox) else: nil + client = newAsyncHttpClient(proxy=proxy, headers=genHeaders(token)) + + try: + let + resp = await client.get($url) + body = await resp.body + + const rl = "x-rate-limit-" + if not oldApi and resp.headers.hasKey(rl & "limit"): + token.limit = parseInt(resp.headers[rl & "limit"]) + token.remaining = parseInt(resp.headers[rl & "remaining"]) + token.reset = fromUnix(parseInt(resp.headers[rl & "reset"])) + + if resp.status != $Http200: + if "Bad guest token" in body: + return + elif not body.startsWith('{'): + echo resp.status, " ", body + return + + result = parseJson(body) + + if result{"errors"} != nil and result{"errors"}[0]{"code"}.getInt == 200: + keepToken = false + echo "bad token" + except: + echo "error: ", url + return nil + finally: + if keepToken: + token.release() + + try: client.close() + except: discard diff --git a/src/consts.nim b/src/consts.nim new file mode 100644 index 0000000..246f020 --- /dev/null +++ b/src/consts.nim @@ -0,0 +1,55 @@ +import uri, sequtils + +const + auth* = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" + + api = parseUri("https://api.twitter.com") + graphql = api / "graphql" + timelineApi = api / "2/timeline" + graphUser* = graphql / "E4iSsd6gypGFWx2eUhSC1g/UserByScreenName" + graphList* = graphql / "ErWsz9cObLel1BF-HjuBlA/ListBySlug" + graphListId* = graphql / "JADTh6cjebfgetzvF3tQvQ/List" + timeline* = timelineApi / "profile" + mediaTimeline* = timelineApi / "media" + listTimeline* = timelineApi / "list.json" + listMembers* = api / "1.1/lists/members.json" + tweet* = timelineApi / "conversation" + search* = api / "2/search/adaptive.json" + + timelineParams* = { + "include_profile_interstitial_type": "0", + "include_blocking": "0", + "include_blocked_by": "0", + "include_followed_by": "1", + "include_want_retweets": "0", + "include_mute_edge": "0", + "include_can_dm": "0", + "include_can_media_tag": "1", + "skip_status": "1", + "cards_platform": "Web-12", + "include_cards": "1", + "include_composer_source": "false", + "include_ext_alt_text": "true", + "include_reply_count": "1", + "tweet_mode": "extended", + "include_entities": "true", + "include_user_entities": "true", + "include_ext_media_color": "false", + "include_ext_media_availability": "true", + "send_error_codes": "true", + "simple_quoted_tweet": "true", + "count": "20", + "ext": "mediaStats,highlightedLabel,cameraMoment", + "include_quote_count": "true" + }.toSeq + + searchParams* = { + "query_source": "typed_query", + "pc": "1", + "spelling_corrections": "1" + }.toSeq + ## top: nothing + ## latest: "tweet_search_mode: live" + ## user: "result_filter: user" + ## photos: "result_filter: photos" + ## videos: "result_filter: videos" diff --git a/src/parser.nim b/src/parser.nim new file mode 100644 index 0000000..636ffae --- /dev/null +++ b/src/parser.nim @@ -0,0 +1,383 @@ +import json, strutils, options, tables, times, math +import types, parserutils + +proc parseProfile(js: JsonNode; id=""): Profile = + if js == nil: return + result = Profile( + id: if id.len > 0: id else: js{"id_str"}.getStr, + username: js{"screen_name"}.getStr, + fullname: js{"name"}.getStr, + location: js{"location"}.getStr, + bio: js{"description"}.getStr, + userpic: js{"profile_image_url_https"}.getStr.replace("_normal", ""), + banner: js.getBanner, + following: $js{"friends_count"}.getInt, + followers: $js{"followers_count"}.getInt, + tweets: $js{"statuses_count"}.getInt, + likes: $js{"favourites_count"}.getInt, + media: $js{"media_count"}.getInt, + verified: js{"verified"}.getBool, + protected: js{"protected"}.getBool, + joinDate: js{"created_at"}.getTime + ) + + result.expandProfileEntities(js) + +proc parseGraphProfile*(js: JsonNode; username: string): Profile = + with errors, js{"errors"}: + for error in errors: + case Error(error{"code"}.getInt) + of notFound: return Profile(username: username) + of suspended: return Profile(username: username, suspended: true) + else: discard + + let user = js{"data", "user", "legacy"} + let id = js{"data", "user", "rest_id"}.getStr + parseProfile(user, id) + +proc parseGraphList*(js: JsonNode): List = + if js == nil: return + + var list = js{"data", "user_by_screen_name", "list"} + if list == nil: + list = js{"data", "list"} + if list == nil: + return + + result = List( + id: list{"id_str"}.getStr, + name: list{"name"}.getStr, + username: list{"user", "legacy", "screen_name"}.getStr, + userId: list{"user", "legacy", "id_str"}.getStr, + description: list{"description"}.getStr, + members: list{"member_count"}.getInt, + banner: list{"custom_banner_media", "media_info", "url"}.getStr + ) + +proc parseListMembers*(js: JsonNode; cursor: string): Result[Profile] = + result = Result[Profile]( + beginning: cursor.len == 0, + query: Query(kind: userList) + ) + + if js == nil: return + + result.top = js{"previous_cursor_str"}.getStr + result.bottom = js{"next_cursor_str"}.getStr + if result.bottom.len == 1: + result.bottom.setLen 0 + + for u in js{"users"}: + result.content.add parseProfile(u) + +proc parsePoll(js: JsonNode): Poll = + let vals = js{"binding_values"} + # name format is pollNchoice_* + for i in '1' .. js{"name"}.getStr[4]: + let choice = "choice" & i + result.values.add parseInt(vals{choice & "_count"}.getStrVal("0")) + result.options.add vals{choice & "_label"}.getStrVal + + let time = vals{"end_datetime_utc", "string_value"}.getDateTime + if time > getTime(): + let timeLeft = $(time - getTime()) + result.status = timeLeft[0 ..< timeLeft.find(",")] + else: + result.status = "Final results" + + result.leader = result.values.find(max(result.values)) + result.votes = result.values.sum + +proc parseGif(js: JsonNode): Gif = + Gif( + url: js{"video_info", "variants"}[0]{"url"}.getStr, + thumb: js{"media_url_https"}.getStr + ) + +proc parseVideo(js: JsonNode): Video = + result = Video( + videoId: js{"id_str"}.getStr, + thumb: js{"media_url_https"}.getStr, + views: js{"ext", "mediaStats", "r", "ok", "viewCount"}.getStr, + available: js{"ext_media_availability", "status"}.getStr == "available", + title: js{"ext_alt_text"}.getStr, + durationMs: js{"duration_millis"}.getInt + ) + + for v in js{"video_info", "variants"}: + result.variants.add VideoVariant( + videoType: v{"content_type"}.to(VideoType), + bitrate: v{"bitrate"}.getInt, + url: v{"url"}.getStr + ) + +proc parsePromoVideo(js: JsonNode): Video = + result = Video( + videoId: js{"player_content_id"}.getStrVal(js{"card_id"}.getStrVal), + thumb: js{"player_image_large", "image_value", "url"}.getStr, + available: true, + durationMs: js{"content_duration_seconds"}.getStrVal("0").parseInt * 1000, + ) + + var variant = VideoVariant( + videoType: m3u8, + url: js{"player_hls_url"}.getStrVal(js{"player_stream_url"}.getStrVal) + ) + + if "vmap" in variant.url: + variant.videoType = vmap + + result.playbackType = vmap + result.variants.add variant + +proc parseBroadcast(js: JsonNode): Card = + let image = js{"broadcast_thumbnail_large", "image_value", "url"}.getStr + result = Card( + kind: broadcast, + url: js{"broadcast_url"}.getStrVal, + title: js{"broadcaster_display_name"}.getStrVal, + text: js{"broadcast_title"}.getStrVal, + image: image, + video: some Video(videoId: js{"broadcast_media_id"}.getStrVal, thumb: image) + ) + +proc parseCard(js: JsonNode; urls: JsonNode): Card = + const imageTypes = ["photo_image_full_size", "summary_photo_image", + "thumbnail_image", "promo_image", "player_image"] + let + vals = ? js{"binding_values"} + name = js{"name"}.getStr + kind = parseEnum[CardKind](name[(name.find(":") + 1) ..< name.len]) + + result = Card( + kind: kind, + url: vals.getCardUrl(kind), + dest: vals.getCardDomain(kind), + title: vals.getCardTitle(kind), + text: vals{"description"}.getStrVal + ) + + if result.url.len == 0: + result.url = js{"url"}.getStr + + case kind + of promoVideo, promoVideoConvo: + result.video = some parsePromoVideo(vals) + of broadcast: + result = parseBroadcast(vals) + of player: + result.url = vals{"player_url"}.getStrVal + if "youtube.com" in result.url: + result.url = result.url.replace("/embed/", "/watch?v=") + else: discard + + for typ in imageTypes: + with img, vals{typ & "_large"}: + result.image = img{"image_value", "url"}.getStr + break + + for u in ? urls: + if u{"url"}.getStr == result.url: + result.url = u{"expanded_url"}.getStr + break + +proc parseTweet(js: JsonNode): Tweet = + if js == nil: return + result = Tweet( + id: js{"id_str"}.getId, + threadId: js{"conversation_id_str"}.getId, + replyId: js{"in_reply_to_status_id_str"}.getId, + text: js{"full_text"}.getStr, + time: js{"created_at"}.getTime, + hasThread: js{"self_thread"} != nil, + available: true, + profile: Profile(id: js{"user_id_str"}.getStr), + stats: TweetStats( + replies: js{"reply_count"}.getInt, + retweets: js{"retweet_count"}.getInt, + likes: js{"favorite_count"}.getInt, + ) + ) + + result.expandTweetEntities(js) + + if js{"is_quote_status"}.getBool: + result.quote = some Tweet(id: js{"quoted_status_id_str"}.getId) + + with rt, js{"retweeted_status_id_str"}: + result.retweet = some Tweet(id: rt.getId) + return + + with jsCard, js{"card"}: + let name = jsCard{"name"}.getStr + if "poll" in name: + if "image" in name: + result.photos.add jsCard{"binding_values", "image_large", "image_value", "url"}.getStr + + result.poll = some parsePoll(jsCard) + else: + result.card = some parseCard(jsCard, js{"entities", "urls"}) + + with jsMedia, js{"extended_entities", "media"}: + for m in jsMedia: + case m{"type"}.getStr + of "photo": + result.photos.add m{"media_url_https"}.getStr + of "video": + result.video = some(parseVideo(m)) + of "animated_gif": + result.gif = some(parseGif(m)) + else: discard + +proc finalizeTweet(global: GlobalObjects; id: string): Tweet = + let intId = if id.len > 0: parseInt(id) else: 0 + result = global.tweets.getOrDefault(id, Tweet(id: intId)) + + if result.quote.isSome: + let quote = get(result.quote).id + if $quote in global.tweets: + result.quote = some global.tweets[$quote] + else: + result.quote = some Tweet() + + if result.retweet.isSome: + let rt = get(result.retweet).id + if $rt in global.tweets: + result.retweet = some finalizeTweet(global, $rt) + else: + result.retweet = some Tweet() + +proc parsePin(js: JsonNode; global: GlobalObjects): Tweet = + let pin = js{"pinEntry", "entry", "entryId"}.getStr + if pin.len == 0: return + + let id = pin.getId + if id notin global.tweets: return + + global.tweets[id].pinned = true + return finalizeTweet(global, id) + +proc parseGlobalObjects(js: JsonNode): GlobalObjects = + result = GlobalObjects() + let + tweets = ? js{"globalObjects", "tweets"} + users = ? js{"globalObjects", "users"} + + for k, v in users: + result.users[k] = parseProfile(v, k) + + for k, v in tweets: + var tweet = parseTweet(v) + if tweet.profile.id in result.users: + tweet.profile = result.users[tweet.profile.id] + result.tweets[k] = tweet + +proc parseThread(js: JsonNode; global: GlobalObjects): tuple[thread: Chain, self: bool] = + result.thread = Chain() + for t in js{"content", "timelineModule", "items"}: + let content = t{"item", "content"} + if "Self" in content{"tweet", "displayType"}.getStr: + result.self = true + + let entry = t{"entryId"}.getStr + if "show_more" in entry: + let + cursor = content{"timelineCursor"} + more = cursor{"displayTreatment", "actionText"}.getStr + result.thread.more = parseInt(more[0 ..< more.find(" ")]) + result.thread.cursor = cursor{"value"}.getStr + else: + var tweet = finalizeTweet(global, entry.getId) + if not tweet.available: + tweet.tombstone = getTombstone(content{"tombstone"}) + result.thread.content.add tweet + +proc parseConversation*(js: JsonNode; tweetId: string): Conversation = + result = Conversation(replies: Result[Chain](beginning: true)) + let global = parseGlobalObjects(? js) + + let instructions = ? js{"timeline", "instructions"} + for e in instructions[0]{"addEntries", "entries"}: + let entry = e{"entryId"}.getStr + if "tweet" in entry: + let tweet = finalizeTweet(global, entry.getId) + if $tweet.id != tweetId: + result.before.content.add tweet + else: + result.tweet = tweet + elif "conversationThread" in entry: + let (thread, self) = parseThread(e, global) + if thread.content.len > 0: + if self: + result.after = thread + else: + result.replies.content.add thread + elif "cursor-showMore" in entry: + result.replies.bottom = e.getCursor + elif "cursor-bottom" in entry: + result.replies.bottom = e.getCursor + +proc parseUsers*(js: JsonNode; after=""): Result[Profile] = + result = Result[Profile](beginning: after.len == 0) + let global = parseGlobalObjects(? js) + + let instructions = ? js{"timeline", "instructions"} + for e in instructions[0]{"addEntries", "entries"}: + let entry = e{"entryId"}.getStr + if "sq-I-u" in entry: + let id = entry.getId + if id in global.users: + result.content.add global.users[id] + elif "cursor-top" in entry: + result.top = e.getCursor + elif "cursor-bottom" in entry: + result.bottom = e.getCursor + +proc parseTimeline*(js: JsonNode; after=""): Timeline = + result = Timeline(beginning: after.len == 0) + let global = parseGlobalObjects(? js) + + let instructions = ? js{"timeline", "instructions"} + if instructions.len == 0: return + + for i in instructions: + if result.beginning and i{"pinEntry"} != nil: + with pin, parsePin(i, global): + result.content.add pin + else: + # This is necessary for search + with r, i{"replaceEntry", "entry"}: + if "top" in r{"entryId"}.getStr: + result.top = r.getCursor + elif "bottom" in r{"entryId"}.getStr: + result.bottom = r.getCursor + + for e in instructions[0]{"addEntries", "entries"}: + let entry = e{"entryId"}.getStr + if "tweet" in entry or "sq-I-t" in entry: + let tweet = finalizeTweet(global, entry.getId) + if not tweet.available: continue + result.content.add tweet + elif "cursor-top" in entry: + result.top = e.getCursor + elif "cursor-bottom" in entry: + result.bottom = e.getCursor + +proc parsePhotoRail*(tl: Timeline): PhotoRail = + for tweet in tl.content: + if result.len == 16: break + + let url = if tweet.photos.len > 0: tweet.photos[0] + elif tweet.video.isSome: get(tweet.video).thumb + elif tweet.gif.isSome: get(tweet.gif).thumb + elif tweet.card.isSome: get(tweet.card).image + else: "" + + if url.len == 0: + continue + + result.add GalleryPhoto( + url: url, + tweetId: $tweet.id, + color: "#161616" # TODO: photo rail specific parser? + ) diff --git a/src/parserutils.nim b/src/parserutils.nim new file mode 100644 index 0000000..937db57 --- /dev/null +++ b/src/parserutils.nim @@ -0,0 +1,212 @@ +import json, strutils, times, tables, macros, htmlgen, uri, unicode, options +import regex +import types, utils, formatters + +const + unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})" + unReplace = "$1@$2" + + htRegex = re"(^|[^A-z0-9-_./?])#([A-z0-9_]+)" + htReplace = "$1#$2" + +template `?`*(js: JsonNode): untyped = + let j = js + if j == nil: return + else: j + +template `with`*(ident, value, body): untyped = + block: + let ident {.inject.} = value + if ident != nil: + body + +template `with`*(ident; value: JsonNode; body): untyped = + block: + let ident {.inject.} = value + if ident != nil and ident.kind != JNull: + body + +proc getCursor*(js: JsonNode): string = + js{"content", "operation", "cursor", "value"}.getStr + +proc parseTime(time: string; f: static string; flen: int): Time = + if time.len != flen: return + parseTime(time, f, utc()) + +proc getDateTime*(js: JsonNode): Time = + parseTime(js.getStr, "yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20) + +proc getTime*(js: JsonNode): Time = + parseTime(js.getStr, "ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30) + +proc getId*(id: string): string = + let start = id.rfind("-") + if start < 0: return id + id[start + 1 ..< id.len] + +proc getId*(js: JsonNode): int64 = + if js == nil: return + case js.kind + of JString: return parseBiggestInt(js.getStr("0")) + of JInt: return js.getBiggestInt() + else: return 0 + +proc getStrVal*(js: JsonNode; default=""): string = + if js == nil: return default + js{"string_value"}.getStr(default) + +proc getCardUrl*(js: JsonNode; kind: CardKind): string = + result = js{"website_url"}.getStrVal + if kind == promoVideoConvo: + result = js{"thank_you_url"}.getStrVal(result) + +proc getCardDomain*(js: JsonNode; kind: CardKind): string = + result = js{"vanity_url"}.getStrVal(js{"domain"}.getStr) + if kind == promoVideoConvo: + result = js{"thank_you_vanity_url"}.getStrVal(result) + +proc getCardTitle*(js: JsonNode; kind: CardKind): string = + result = js{"title"}.getStrVal + if kind == promoVideoConvo: + result = js{"thank_you_text"}.getStrVal(result) + +proc getBanner*(js: JsonNode): string = + let url = js{"profile_banner_url"}.getStr + if url.len > 0: + return url & "/1500x500" + + let color = js{"profile_link_color"}.getStr + if color.len > 0: + return '#' & color + + # use primary color from profile picture color histrogram + with p, js{"profile_image_extensions", "mediaColor", "r", "ok", "palette"}: + if p.len > 0: + let pal = p[0]{"rgb"}.getFields + result = "#" + result.add toHex(pal["red"].getInt, 2) + result.add toHex(pal["green"].getInt, 2) + result.add toHex(pal["blue"].getInt, 2) + return + + return "#161616" + +proc getTombstone*(js: JsonNode): string = + let epitaph = js{"epitaph"}.getStr + case epitaph + of "Suspended": + result = "This tweet is from a suspended account." + of "Protected": + result = "This account owner limits who can view their tweets." + of "Missing": + result = "This tweet is unavailable." + else: + result = js{"tombstoneInfo", "richText", "text"}.getStr + if epitaph.len > 0 or result.len > 0: + echo "Unknown tombstone (", epitaph, "): ", result + +template getSlice(text: string; slice: seq[int]): string = + text.runeSubStr(slice[0], slice[1] - slice[0]) + +proc getSlice(text: string; js: JsonNode): string = + if js == nil or js.kind != JArray or js.len < 2 or + js[0].kind != JInt: return text + + let slice = js.to(seq[int]) + text.getSlice(slice) + +proc expandUrl(text: var string; js: JsonNode; tLen: int; hideTwitter=false) = + let u = js{"url"}.getStr + if u.len == 0 or u notin text: + return + + let + url = js{"expanded_url"}.getStr + slice = js{"indices"}.to(seq[int]) + + if hideTwitter and slice[1] >= tLen and url.isTwitterUrl: + text = text.replace(u, "") + text.removeSuffix(' ') + text.removeSuffix('\n') + else: + text = text.replace(u, a(shortLink(url), href=url)) + +proc expandTag(text: var string; js: JsonNode; prefix: char) = + let + tag = prefix & js{"text"}.getStr + html = a(tag, href=("/search?q=" & encodeUrl(tag))) + oldLen = text.len + + text = text.replaceWord(tag, html) + + # for edgecases with emojis or other characters around the tag + if text.len == oldLen: + text = text.replace(tag, html) + +proc expandMention(text: var string; orig: string; js: JsonNode) = + let + name = js{"name"}.getStr + href = '/' & js{"screen_name"}.getStr + uname = orig.getSlice(js{"indices"}) + text = text.replace(uname, a(uname, href=href, title=name)) + +proc expandProfileEntities*(profile: var Profile; js: JsonNode) = + let + orig = profile.bio + ent = ? js{"entities"} + + with urls, ent{"url", "urls"}: + profile.website = urls[0]{"expanded_url"}.getStr + + with urls, ent{"description", "urls"}: + for u in urls: profile.bio.expandUrl(u, orig.high) + + profile.bio = profile.bio.replace(unRegex, unReplace) + .replace(htRegex, htReplace) + + for mention in ? ent{"user_mentions"}: + profile.bio.expandMention(orig, mention) + +proc expandTweetEntities*(tweet: Tweet; js: JsonNode) = + let + orig = tweet.text + slice = js{"display_text_range"}.to(seq[int]) + hasQuote = js{"is_quote_status"}.getBool + hasCard = tweet.card.isSome + + tweet.text = tweet.text.getSlice(slice) + + var replyTo = "" + if tweet.replyId != 0: + with reply, js{"in_reply_to_screen_name"}: + tweet.reply.add reply.getStr + replyTo = reply.getStr + + let ent = ? js{"entities"} + + with urls, ent{"urls"}: + for u in urls: + tweet.text.expandUrl(u, slice[1], hasQuote) + if hasCard and u{"url"}.getStr == get(tweet.card).url: + get(tweet.card).url = u{"expanded_url"}.getStr + + with media, ent{"media"}: + for m in media: tweet.text.expandUrl(m, slice[1], hideTwitter=true) + + with hashes, ent{"hashtags"}: + for h in hashes: tweet.text.expandTag(h, '#') + + with symbols, ent{"symbols"}: + for s in symbols: tweet.text.expandTag(s, '$') + + for mention in ? ent{"user_mentions"}: + let + name = mention{"screen_name"}.getStr + idx = tweet.reply.find(name) + + if mention{"indices"}[0].getInt >= slice[0]: + tweet.text.expandMention(orig, mention) + if idx > -1 and name != replyTo: + tweet.reply.delete idx + elif idx == -1 and tweet.replyId != 0: + tweet.reply.add name diff --git a/src/redis_cache.nim b/src/redis_cache.nim new file mode 100644 index 0000000..04c7e23 --- /dev/null +++ b/src/redis_cache.nim @@ -0,0 +1,114 @@ +import asyncdispatch, times, strutils, options, tables +import redis, redpool, msgpack4nim +export redpool, msgpack4nim + +import types, api + +const redisNil = "\0\0" + +var + pool: RedisPool + baseCacheTime = 60 * 60 + rssCacheTime: int + listCacheTime*: int + +proc setCacheTimes*(cfg: Config) = + rssCacheTime = cfg.rssCacheTime * 60 + listCacheTime = cfg.listCacheTime * 60 + +proc initRedisPool*(cfg: Config) = + pool = waitFor newRedisPool(cfg.redisConns, maxConns=cfg.redisMaxConns, + host=cfg.redisHost, port=cfg.redisPort) + +template toKey(p: Profile): string = "p:" & toLower(p.username) +template toKey(v: Video): string = "v:" & v.videoId +template toKey(c: Card): string = "c:" & c.id +template toKey(l: List): string = toLower("l:" & l.username & '/' & l.name) +template toKey(t: Token): string = "t:" & t.tok + +template to(s: string; typ: typedesc): untyped = + var res: typ + if s.len > 0: + s.unpack(res) + res + +proc get(query: string): Future[string] {.async.} = + pool.withAcquire(r): + result = await r.get(query) + +proc uncache*(id: int64) {.async.} = + pool.withAcquire(r): + discard await r.del("v:" & $id) + +proc cache*[T](data: T; time=baseCacheTime) {.async.} = + pool.withAcquire(r): + discard await r.setex(data.toKey, time, pack(data)) + +proc cache*(data: PhotoRail; id: string) {.async.} = + pool.withAcquire(r): + discard await r.setex("pr:" & id, baseCacheTime, pack(data)) + +proc cache*(data: Profile; time=baseCacheTime) {.async.} = + pool.withAcquire(r): + r.startPipelining() + discard await r.setex(data.toKey, time, pack(data)) + discard await r.hset("p:", toLower(data.username), data.id) + discard await r.flushPipeline() + +proc cacheRss*(query, rss, cursor: string) {.async.} = + let key = "rss:" & query + pool.withAcquire(r): + r.startPipelining() + await r.hmset(key, @[("rss", rss), ("min", cursor)]) + discard await r.expire(key, rssCacheTime) + discard await r.flushPipeline() + +proc getProfileId*(username: string): Future[string] {.async.} = + pool.withAcquire(r): + result = await r.hget("p:", toLower(username)) + if result == redisNil: + result.setLen(0) + +proc hasCachedProfile*(username: string): Future[Option[Profile]] {.async.} = + let prof = await get("p:" & toLower(username)) + if prof != redisNil: + result = some prof.to(Profile) + +proc getCachedProfile*(username: string; fetch=true): Future[Profile] {.async.} = + let prof = await get("p:" & toLower(username)) + if prof != redisNil: + result = prof.to(Profile) + else: + result = await getGraphProfile(username) + if result.id.len > 0: + await cache(result) + +proc getCachedPhotoRail*(id: string): Future[PhotoRail] {.async.} = + if id.len == 0: return + let rail = await get("pr:" & toLower(id)) + if rail != redisNil: + result = rail.to(PhotoRail) + else: + result = await getPhotoRail(id) + await cache(result, id) + +proc getCachedList*(username=""; name=""; id=""): Future[List] {.async.} = + let list = if id.len > 0: redisNil + else: await get(toLower("l:" & username & '/' & name)) + + if list != redisNil: + result = list.to(List) + else: + if id.len > 0: + result = await getGraphListById(id) + else: + result = await getGraphList(username, name) + await cache(result, time=listCacheTime) + +proc getCachedRss*(key: string): Future[(string, string)] {.async.} = + var res: Table[string, string] + pool.withAcquire(r): + res = await r.hgetall("rss:" & key) + + if "rss" in res: + result = (res["rss"], res["min"]) diff --git a/src/tokens.nim b/src/tokens.nim new file mode 100644 index 0000000..dc80383 --- /dev/null +++ b/src/tokens.nim @@ -0,0 +1,59 @@ +import asyncdispatch, httpclient, times, sequtils, strutils +import types + +var tokenPool: seq[Token] + +proc fetchToken(): Future[Token] {.async.} = + let + headers = newHttpHeaders({ + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "accept-language": "en-US,en;q=0.5", + "connection": "keep-alive", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0" + }) + client = newAsyncHttpClient(headers=headers) + resp = await client.getContent("https://twitter.com") + pos = resp.rfind("gt=") + + try: client.close() + except: discard + + if pos == -1: echo "token parse fail"; return + result = Token(tok: resp[pos+3 .. pos+21], limit: 187, remaining: 187, + reset: getTime() + 15.minutes, init: getTime()) + +proc expired(token: Token): bool {.inline.} = + const expirationTime = 1.hours + result = token.init < getTime() - expirationTime + +proc isLimited(token: Token): bool {.inline.} = + token == nil or token.remaining <= 1 and token.reset > getTime() or + token.expired + +proc release*(token: Token) = + if token != nil and not token.expired: + tokenPool.insert(token) + +proc getToken*(): Future[Token] {.async.} = + for i in 0 ..< tokenPool.len: + if not result.isLimited: break + result.release() + result = tokenPool.pop() + + if result.isLimited: + result.release() + result = await fetchToken() + +proc poolTokens*(amount: int) {.async.} = + var futs: seq[Future[Token]] + for i in 0 ..< amount: + futs.add fetchToken() + + for token in futs: + release(await token) + +proc initTokenPool*(cfg: Config) {.async.} = + while true: + if tokenPool.filterIt(not it.isLimited).len < cfg.minTokens: + await poolTokens(min(5, cfg.minTokens - tokenPool.len)) + await sleepAsync(4000)