Merge remote-tracking branch 'upstream/guest_accounts'

This commit is contained in:
PrivacyDev 2023-09-01 17:44:44 -04:00
commit b313bb0e72
16 changed files with 152 additions and 118 deletions

View file

@ -40,6 +40,9 @@ jobs:
nimble md
nimble scss
- name: Run tests
env:
GUEST_ACCOUNTS: ${{ secrets.GUEST_ACCOUNTS }}
run: |
echo $GUEST_ACCOUNTS > ./guest_accounts.json
./nitter &
pytest -n4 tests

View file

@ -155,29 +155,25 @@ proc getGraphTweetSearch*(query: Query; after=""): Future[Timeline] {.async.} =
if after.len > 0:
variables["cursor"] = % after
let url = graphSearchTimeline ? {"variables": $variables, "features": gqlFeatures}
result = parseGraphSearch[Tweets](await fetch(url, Api.search), after)
result = parseGraphSearch(await fetch(url, Api.search), after)
result.query = query
proc getGraphUserSearch*(query: Query; after=""): Future[Result[User]] {.async.} =
proc getUserSearch*(query: Query; page="1"): Future[Result[User]] {.async.} =
if query.text.len == 0:
return Result[User](query: query, beginning: true)
var
variables = %*{
"rawQuery": query.text,
"count": 20,
"product": "People",
"withDownvotePerspective": false,
"withReactionsMetadata": false,
"withReactionsPerspective": false
}
if after.len > 0:
variables["cursor"] = % after
result.beginning = false
let
page = if page.len == 0: "1" else: page
url = userSearch ? genParams({"q": query.text, "skip_status": "1", "page": page})
js = await fetchRaw(url, Api.userSearch)
result = parseUsers(js)
let url = graphSearchTimeline ? {"variables": $variables, "features": gqlFeatures}
result = parseGraphSearch[User](await fetch(url, Api.search), after)
result.query = query
if page.len == 0:
result.bottom = "2"
elif page.allCharsInSet(Digits):
result.bottom = $(parseInt(page) + 1)
proc getPhotoRail*(name: string): Future[PhotoRail] {.async.} =
if name.len == 0: return

View file

@ -75,6 +75,7 @@ template fetchImpl(result, additional_headers, fetchBody) {.dirty.} =
var account = await getGuestAccount(api)
if account.oauthToken.len == 0:
echo "[accounts] Empty oauth token, account: ", account.id
raise rateLimitError()
try:
@ -96,28 +97,47 @@ template fetchImpl(result, additional_headers, fetchBody) {.dirty.} =
badClient = true
raise newException(BadClientError, "Bad client")
if resp.headers.hasKey(rlRemaining):
let
remaining = parseInt(resp.headers[rlRemaining])
reset = parseInt(resp.headers[rlReset])
account.setRateLimit(api, remaining, reset)
if result.len > 0:
if resp.headers.getOrDefault("content-encoding") == "gzip":
result = uncompress(result, dfGzip)
else:
echo "non-gzip body, url: ", url, ", body: ", result
if result.startsWith("{\"errors"):
let errors = result.fromJson(Errors)
if errors in {expiredToken, badToken}:
echo "fetch error: ", errors
invalidate(account)
raise rateLimitError()
elif errors in {rateLimited}:
# rate limit hit, resets after 24 hours
setLimited(account, api)
raise rateLimitError()
elif result.startsWith("429 Too Many Requests"):
echo "[accounts] 429 error, API: ", api, ", account: ", account.id
account.apis[api].remaining = 0
# rate limit hit, resets after the 15 minute window
raise rateLimitError()
fetchBody
release(account, used=true)
if resp.status == $Http400:
raise newException(InternalError, $url)
except InternalError as e:
raise e
except BadClientError as e:
release(account, used=true)
raise e
except OSError as e:
raise e
except Exception as e:
echo "error: ", e.name, ", msg: ", e.msg, ", accountId: ", account.id, ", url: ", url
if "length" notin e.msg and "descriptor" notin e.msg:
release(account, invalid=true)
raise rateLimitError()
finally:
release(account)
proc fetch*(url: Uri; api: Api; additional_headers: HttpHeaders = newHttpHeaders()): Future[JsonNode] {.async.} =
@ -134,12 +154,10 @@ proc fetch*(url: Uri; api: Api; additional_headers: HttpHeaders = newHttpHeaders
echo resp.status, ": ", body, " --- url: ", url
result = newJNull()
updateAccount()
let error = result.getError
if error in {invalidToken, badToken}:
echo "fetch error: ", result.getError
release(account, invalid=true)
if error in {expiredToken, badToken}:
echo "fetchBody error: ", error
invalidate(account)
raise rateLimitError()
proc fetchRaw*(url: Uri; api: Api; additional_headers: HttpHeaders = newHttpHeaders()): Future[string] {.async.} =
@ -147,12 +165,3 @@ proc fetchRaw*(url: Uri; api: Api; additional_headers: HttpHeaders = newHttpHead
if not (result.startsWith('{') or result.startsWith('[')):
echo resp.status, ": ", result, " --- url: ", url
result.setLen(0)
updateAccount()
if result.startsWith("{\"errors"):
let errors = result.fromJson(Errors)
if errors in {invalidToken, badToken}:
echo "fetch error: ", errors
release(account, invalid=true)
raise rateLimitError()

View file

@ -9,6 +9,7 @@ const
activate* = $(api / "1.1/guest/activate.json")
photoRail* = api / "1.1/statuses/media_timeline.json"
userSearch* = api / "1.1/users/search.json"
timelineApi = api / "2/timeline"
favorites* = timelineApi / "favorites"
@ -19,7 +20,7 @@ const
graphUserTweets* = graphql / "3JNH4e9dq1BifLxAa3UMWg/UserWithProfileTweetsQueryV2"
graphUserTweetsAndReplies* = graphql / "8IS8MaO-2EN6GZZZb8jF0g/UserWithProfileTweetsAndRepliesQueryV2"
graphUserMedia* = graphql / "PDfFf8hGeJvUCiTyWtw4wQ/MediaTimelineV2"
graphTweet* = graphql / "83h5UyHZ9wEKBVzALX8R_g/ConversationTimelineV2"
graphTweet* = graphql / "q94uRCEn65LZThakYcPT6g/TweetDetail"
graphTweetResult* = graphql / "sITyJdhRPpvpEjg4waUmTA/TweetResultByIdQuery"
graphSearchTimeline* = graphql / "gkjsKepM6gl_HmFWoWKfgg/SearchTimeline"
graphListById* = graphql / "iTpgCtbdxrsJfyx0cFjHqg/ListByRestId"
@ -41,6 +42,7 @@ const
"include_user_entities": "1",
"include_ext_reply_count": "1",
"include_ext_is_blue_verified": "1",
#"include_ext_verified_type": "1",
"include_ext_media_color": "0",
"cards_platform": "Web-13",
"tweet_mode": "extended",
@ -94,8 +96,12 @@ const
tweetVariables* = """{
"focalTweetId": "$1",
$2
"includeHasBirdwatchNotes": false
}"""
"includeHasBirdwatchNotes": false,
"includePromotedContent": false,
"withBirdwatchNotes": false,
"withVoice": false,
"withV2Timeline": true
}""".replace(" ", "").replace("\n", "")
# oldUserTweetsVariables* = """{
# "userId": "$1", $2

View file

@ -56,7 +56,7 @@ proc toUser*(raw: RawUser): User =
tweets: raw.statusesCount,
likes: raw.favouritesCount,
media: raw.mediaCount,
verified: raw.verified,
verified: raw.verified or raw.extIsBlueVerified,
protected: raw.protected,
joinDate: parseTwitterDate(raw.createdAt),
banner: getBanner(raw),

View file

@ -16,6 +16,7 @@ type
statusesCount*: int
mediaCount*: int
verified*: bool
extIsBlueVerified*: bool
protected*: bool
profileLinkColor*: string
profileBannerUrl*: string

View file

@ -39,11 +39,8 @@ template use*(pool: HttpPool; heads: HttpHeaders; body: untyped): untyped =
try:
body
except ProtocolError:
# Twitter closed the connection, retry
body
except BadClientError:
# Twitter returned 503, we need a new client
except BadClientError, ProtocolError:
# Twitter returned 503 or closed the connection, we need a new client
pool.release(c, true)
badClient = false
c = pool.acquire(heads)

View file

@ -1,10 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-only
import strutils, options, times, math, tables
import strutils, options, times, math
import packedjson, packedjson/deserialiser
import types, parserutils, utils
import experimental/parser/unifiedcard
import std/tables
proc parseGraphTweet(js: JsonNode): Tweet
proc parseGraphTweet(js: JsonNode; isLegacy=false): Tweet
proc parseUser(js: JsonNode; id=""): User =
if js.isNull: return
@ -424,7 +425,7 @@ proc parsePhotoRail*(js: JsonNode): PhotoRail =
if url.len == 0: continue
result.add GalleryPhoto(url: url, tweetId: $t.id)
proc parseGraphTweet(js: JsonNode): Tweet =
proc parseGraphTweet(js: JsonNode; isLegacy=false): Tweet =
if js.kind == JNull:
return Tweet()
@ -440,9 +441,9 @@ proc parseGraphTweet(js: JsonNode): Tweet =
of "TweetPreviewDisplay":
return Tweet(text: "You're unable to view this Tweet because it's only available to the Subscribers of the account owner.")
of "TweetWithVisibilityResults":
return parseGraphTweet(js{"tweet"})
return parseGraphTweet(js{"tweet"}, isLegacy)
var jsCard = copy(js{"tweet_card", "legacy"})
var jsCard = copy(js{if isLegacy: "card" else: "tweet_card", "legacy"})
if jsCard.kind != JNull:
var values = newJObject()
for val in jsCard["binding_values"]:
@ -457,10 +458,9 @@ proc parseGraphTweet(js: JsonNode): Tweet =
result.expandNoteTweetEntities(noteTweet)
if result.quote.isSome:
result.quote = some(parseGraphTweet(js{"quoted_status_result", "result"}))
result.quote = some(parseGraphTweet(js{"quoted_status_result", "result"}, isLegacy))
proc parseGraphThread(js: JsonNode): tuple[thread: Chain; self: bool] =
let thread = js{"content", "items"}
for t in js{"content", "items"}:
let entryId = t{"entryId"}.getStr
if "cursor-showmore" in entryId:
@ -468,28 +468,33 @@ proc parseGraphThread(js: JsonNode): tuple[thread: Chain; self: bool] =
result.thread.cursor = cursor.getStr
result.thread.hasMore = true
elif "tweet" in entryId:
let tweet = parseGraphTweet(t{"item", "content", "tweetResult", "result"})
result.thread.content.add tweet
let
isLegacy = t{"item"}.hasKey("itemContent")
(contentKey, resultKey) = if isLegacy: ("itemContent", "tweet_results")
else: ("content", "tweetResult")
if t{"item", "content", "tweetDisplayType"}.getStr == "SelfThread":
result.self = true
with content, t{"item", contentKey}:
result.thread.content.add parseGraphTweet(content{resultKey, "result"}, isLegacy)
if content{"tweetDisplayType"}.getStr == "SelfThread":
result.self = true
proc parseGraphTweetResult*(js: JsonNode): Tweet =
with tweet, js{"data", "tweet_result", "result"}:
result = parseGraphTweet(tweet)
result = parseGraphTweet(tweet, false)
proc parseGraphConversation*(js: JsonNode; tweetId: string): Conversation =
result = Conversation(replies: Result[Chain](beginning: true))
let instructions = ? js{"data", "timeline_response", "instructions"}
let instructions = ? js{"data", "threaded_conversation_with_injections_v2", "instructions"}
if instructions.len == 0:
return
for e in instructions[0]{"entries"}:
let entryId = e{"entryId"}.getStr
if entryId.startsWith("tweet"):
with tweetResult, e{"content", "content", "tweetResult", "result"}:
let tweet = parseGraphTweet(tweetResult)
with tweetResult, e{"content", "itemContent", "tweet_results", "result"}:
let tweet = parseGraphTweet(tweetResult, true)
if not tweet.available:
tweet.id = parseBiggestInt(entryId.getId())
@ -503,7 +508,7 @@ proc parseGraphConversation*(js: JsonNode; tweetId: string): Conversation =
let tweet = Tweet(
id: parseBiggestInt(id),
available: false,
text: e{"content", "content", "tombstoneInfo", "richText"}.getTombstone
text: e{"content", "itemContent", "tombstoneInfo", "richText"}.getTombstone
)
if id == tweetId:
@ -517,7 +522,7 @@ proc parseGraphConversation*(js: JsonNode; tweetId: string): Conversation =
else:
result.replies.content.add thread
elif entryId.startsWith("cursor-bottom"):
result.replies.bottom = e{"content", "content", "value"}.getStr
result.replies.bottom = e{"content", "itemContent", "value"}.getStr
proc parseGraphTimeline*(js: JsonNode; root: string; after=""): Profile =
result = Profile(tweets: Timeline(beginning: after.len == 0))
@ -535,7 +540,7 @@ proc parseGraphTimeline*(js: JsonNode; root: string; after=""): Profile =
let entryId = e{"entryId"}.getStr
if entryId.startsWith("tweet"):
with tweetResult, e{"content", "content", "tweetResult", "result"}:
let tweet = parseGraphTweet(tweetResult)
let tweet = parseGraphTweet(tweetResult, false)
if not tweet.available:
tweet.id = parseBiggestInt(entryId.getId())
result.tweets.content.add tweet
@ -546,7 +551,7 @@ proc parseGraphTimeline*(js: JsonNode; root: string; after=""): Profile =
result.tweets.bottom = e{"content", "value"}.getStr
if after.len == 0 and i{"__typename"}.getStr == "TimelinePinEntry":
with tweetResult, i{"entry", "content", "content", "tweetResult", "result"}:
let tweet = parseGraphTweet(tweetResult)
let tweet = parseGraphTweet(tweetResult, false)
tweet.pinned = true
if not tweet.available and tweet.tombstone.len == 0:
let entryId = i{"entry", "entryId"}.getEntryId
@ -584,8 +589,8 @@ proc parseGraphRetweetersTimeline*(js: JsonNode; root: string; after=""): UsersT
proc parseGraphFollowTimeline*(js: JsonNode; root: string; after=""): UsersTimeline =
return parseGraphUsersTimeline(js{"data", "user", "result", "timeline", "timeline"}, after)
proc parseGraphSearch*[T: User | Tweets](js: JsonNode; after=""): Result[T] =
result = Result[T](beginning: after.len == 0)
proc parseGraphSearch*(js: JsonNode; after=""): Timeline =
result = Timeline(beginning: after.len == 0)
let instructions = js{"data", "search_by_raw_query", "search_timeline", "timeline", "instructions"}
if instructions.len == 0:
@ -596,19 +601,13 @@ proc parseGraphSearch*[T: User | Tweets](js: JsonNode; after=""): Result[T] =
if typ == "TimelineAddEntries":
for e in instruction{"entries"}:
let entryId = e{"entryId"}.getStr
when T is Tweets:
if entryId.startsWith("tweet"):
with tweetRes, e{"content", "itemContent", "tweet_results", "result"}:
let tweet = parseGraphTweet(tweetRes)
if not tweet.available:
tweet.id = parseBiggestInt(entryId.getId())
result.content.add tweet
elif T is User:
if entryId.startsWith("user"):
with userRes, e{"content", "itemContent"}:
result.content.add parseGraphUser(userRes)
if entryId.startsWith("cursor-bottom"):
if entryId.startsWith("tweet"):
with tweetRes, e{"content", "itemContent", "tweet_results", "result"}:
let tweet = parseGraphTweet(tweetRes, true)
if not tweet.available:
tweet.id = parseBiggestInt(entryId.getId())
result.content.add tweet
elif entryId.startsWith("cursor-bottom"):
result.bottom = e{"content", "value"}.getStr
elif typ == "TimelineReplaceEntry":
if instruction{"entry_id_to_replace"}.getStr.startsWith("cursor-bottom"):

View file

@ -36,7 +36,8 @@ template with*(ident, value, body): untyped =
template with*(ident; value: JsonNode; body): untyped =
if true:
let ident {.inject.} = value
if value.kind != JNull: body
# value.notNull causes a compilation error for versions < 1.6.14
if notNull(value): body
template getCursor*(js: JsonNode): string =
js{"content", "operation", "cursor", "value"}.getStr

View file

@ -37,6 +37,7 @@ proc proxyMedia*(req: jester.Request; url: string): Future[HttpCode] {.async.} =
try:
let res = await client.get(url)
if res.status != "200 OK":
echo "[media] Proxying media failed, status: $1, url: $2, body: $3" % [res.status, url, await res.body]
return Http404
let hashed = $hash(url)
@ -65,6 +66,7 @@ proc proxyMedia*(req: jester.Request; url: string): Future[HttpCode] {.async.} =
await request.client.send(data)
data.setLen 0
except HttpRequestError, ProtocolError, OSError:
echo "[media] Proxying media exception, error: $1, url: $2" % [getCurrentExceptionMsg(), url]
result = Http404
finally:
client.close()

View file

@ -29,7 +29,7 @@ proc createSearchRouter*(cfg: Config) =
redirect("/" & q)
var users: Result[User]
try:
users = await getGraphUserSearch(query, getCursor())
users = await getUserSearch(query, getCursor())
except InternalError:
users = Result[User](beginning: true, query: query)
resp renderMain(renderUserSearch(users, prefs), request, cfg, prefs, title)

View file

@ -1,64 +1,78 @@
# SPDX-License-Identifier: AGPL-3.0-only
import asyncdispatch, times, json, random, strutils, tables
#SPDX-License-Identifier: AGPL-3.0-only
import asyncdispatch, times, json, random, strutils, tables, sets
import types
# max requests at a time per account to avoid race conditions
const
maxConcurrentReqs = 5
maxConcurrentReqs = 2
dayInSeconds = 24 * 60 * 60
var
accountPool: seq[GuestAccount]
enableLogging = false
template log(str) =
if enableLogging: echo "[accounts] ", str
template log(str: varargs[string, `$`]) =
if enableLogging: echo "[accounts] ", str.join("")
proc getPoolJson*(): JsonNode =
var
list = newJObject()
totalReqs = 0
totalPending = 0
limited: HashSet[string]
reqsPerApi: Table[string, int]
let now = epochTime().int
for account in accountPool:
totalPending.inc(account.pending)
list[account.id] = %*{
var includeAccount = false
let accountJson = %*{
"apis": newJObject(),
"pending": account.pending,
}
for api in account.apis.keys:
let obj = %*{}
if account.apis[api].limited:
obj["limited"] = %true
let
apiStatus = account.apis[api]
obj = %*{}
if account.apis[api].reset > now.int:
obj["remaining"] = %account.apis[api].remaining
if apiStatus.reset > now.int:
obj["remaining"] = %apiStatus.remaining
list[account.id]["apis"][$api] = obj
if "remaining" notin obj:
if "remaining" notin obj and not apiStatus.limited:
continue
if apiStatus.limited:
obj["limited"] = %true
limited.incl account.id
accountJson{"apis", $api} = obj
includeAccount = true
let
maxReqs =
case api
of Api.search: 50
of Api.tweetDetail: 150
of Api.photoRail: 180
of Api.userTweets, Api.userTweetsAndReplies, Api.userMedia,
Api.userRestId, Api.userScreenName,
Api.tweetDetail, Api.tweetResult,
Api.tweetResult,
Api.list, Api.listTweets, Api.listMembers, Api.listBySlug, Api.favorites, Api.retweeters, Api.favoriters, Api.following, Api.followers: 500
reqs = maxReqs - account.apis[api].remaining
of Api.userSearch: 900
reqs = maxReqs - apiStatus.remaining
reqsPerApi[$api] = reqsPerApi.getOrDefault($api, 0) + reqs
totalReqs.inc(reqs)
if includeAccount:
list[account.id] = accountJson
return %*{
"amount": accountPool.len,
"limited": limited.card,
"requests": totalReqs,
"pending": totalPending,
"apis": reqsPerApi,
@ -77,7 +91,7 @@ proc isLimited(account: GuestAccount; api: Api): bool =
if limit.limited and (epochTime().int - limit.limitedAt) > dayInSeconds:
account.apis[api].limited = false
echo "account limit reset, api: ", api, ", id: ", account.id
log "resetting limit, api: ", api, ", id: ", account.id
return limit.limited or (limit.remaining <= 10 and limit.reset > epochTime().int)
else:
@ -86,28 +100,35 @@ proc isLimited(account: GuestAccount; api: Api): bool =
proc isReady(account: GuestAccount; api: Api): bool =
not (account.isNil or account.pending > maxConcurrentReqs or account.isLimited(api))
proc release*(account: GuestAccount; used=false; invalid=false) =
proc invalidate*(account: var GuestAccount) =
if account.isNil: return
if invalid:
log "discarding invalid account: " & account.id
log "invalidating expired account: ", account.id
let idx = accountPool.find(account)
if idx > -1: accountPool.delete(idx)
elif used:
dec account.pending
# TODO: This isn't sufficient, but it works for now
let idx = accountPool.find(account)
if idx > -1: accountPool.delete(idx)
account = nil
proc release*(account: GuestAccount) =
if account.isNil: return
dec account.pending
proc getGuestAccount*(api: Api): Future[GuestAccount] {.async.} =
for i in 0 ..< accountPool.len:
if result.isReady(api): break
release(result)
result = accountPool.sample()
if not result.isNil and result.isReady(api):
inc result.pending
else:
log "no accounts available for API: " & $api
log "no accounts available for API: ", api
raise rateLimitError()
proc setLimited*(account: GuestAccount; api: Api) =
account.apis[api].limited = true
account.apis[api].limitedAt = epochTime().int
log "rate limited, api: ", api, ", reqs left: ", account.apis[api].remaining, ", id: ", account.id
proc setRateLimit*(account: GuestAccount; api: Api; remaining, reset: int) =
# avoid undefined behavior in race conditions
if api in account.apis:

View file

@ -19,6 +19,7 @@ type
tweetResult
photoRail
search
userSearch
list
listBySlug
listMembers
@ -44,8 +45,6 @@ type
id*: string
oauthToken*: string
oauthSecret*: string
# init*: Time
lastUse*: Time
pending*: int
apis*: Table[Api, RateLimit]
@ -60,7 +59,7 @@ type
userNotFound = 50
suspended = 63
rateLimited = 88
invalidToken = 89
expiredToken = 89
listIdOrSlug = 112
tweetNotFound = 144
tweetNotAuthorized = 179

View file

@ -13,11 +13,6 @@ card = [
'Basic OBS Studio plugin, written in nim, supporting C++ (C fine too) - obsplugin.nim',
'gist.github.com', True],
['FluentAI/status/1116417904831029248',
'Amazons Alexa isnt just AI — thousands of humans are listening',
'One of the only ways to improve Alexa is to have human beings check it for errors',
'theverge.com', True],
['nim_lang/status/1082989146040340480',
'Nim in 2018: A short recap',
'There were several big news in the Nim world in 2018 two new major releases, partnership with Status, and much more. But let us go chronologically.',
@ -25,6 +20,11 @@ card = [
]
no_thumb = [
['FluentAI/status/1116417904831029248',
'Amazons Alexa isnt just AI — thousands of humans are listening',
'One of the only ways to improve Alexa is to have human beings check it for errors',
'theverge.com'],
['Thom_Wolf/status/1122466524860702729',
'facebookresearch/fairseq',
'Facebook AI Research Sequence-to-Sequence Toolkit written in Python. - GitHub - facebookresearch/fairseq: Facebook AI Research Sequence-to-Sequence Toolkit written in Python.',

View file

@ -6,7 +6,7 @@ normal = [['jack'], ['elonmusk']]
after = [['jack', '1681686036294803456'],
['elonmusk', '1681686036294803456']]
no_more = [['mobile_test_8?cursor=1000']]
no_more = [['mobile_test_8?cursor=DAABCgABF4YVAqN___kKAAICNn_4msIQAAgAAwAAAAIAAA']]
empty = [['emptyuser'], ['mobile_test_10']]

View file

@ -14,7 +14,7 @@ poll = [
image = [
['mobile_test/status/519364660823207936', 'BzUnaDFCUAAmrjs'],
['mobile_test_2/status/324619691039543297', 'BIFH45vCUAAQecj']
#['mobile_test_2/status/324619691039543297', 'BIFH45vCUAAQecj']
]
gif = [