Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions examples/about_account.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import asyncio

from twscrape import API


async def main():
api = API()
username = "sama"

info = await api.user_about(username)
print(info.dict() if info else "Not found")


if __name__ == "__main__":
asyncio.run(main())
2 changes: 2 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ async def main():
# get user by login
user_login = "xdevelopers"
await api.user_by_login(user_login) # User
await api.user_about(user_login) # AccountAbout

# user info
user_id = 2244994945
Expand Down Expand Up @@ -245,6 +246,7 @@ twscrape tweet_details TWEET_ID
twscrape tweet_replies TWEET_ID --limit=20
twscrape retweeters TWEET_ID --limit=20
twscrape user_by_login USERNAME
twscrape user_about USERNAME
twscrape user_media USER_ID --limit=20
twscrape following USER_ID --limit=20
twscrape followers USER_ID --limit=20
Expand Down
41 changes: 30 additions & 11 deletions scripts/update_gql_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,34 +62,53 @@ async def main():

print("Fetching Twitter JS bundle...")
all_pairs: dict[str, str] = {}
conflicts: list[tuple[str, str, str]] = []

def _add(op_name: str, op_id: str):
existing = all_pairs.get(op_name)
if existing is not None and existing != op_id:
conflicts.append((op_name, existing, op_id))
all_pairs[op_name] = op_id

rgs = [
r'queryId:"(.+?)".+?operationName:"(.+?)"',
r'params:\{id:"([^"]+)".+?name:"([^"]+)".+?operationKind:"',
]

for txt in await get_scripts():
for op_id, op_name in re.findall(r'queryId:"(.+?)".+?operationName:"(.+?)"', txt):
all_pairs[op_name] = op_id
for rg in rgs:
for op_id, op_name in re.findall(rg, txt):
_add(op_name, op_id)

if conflicts:
print("WARNING: conflicting IDs found for same operation (last one wins):")
for n, old_id, new_id in conflicts:
print(f" {n}: {old_id} vs {new_id}")

print(f"Found {len(all_pairs)} operations in bundle\n")

updated = content
changed, missing = [], []

for name, old_id in current_ops.items():
new_id = all_pairs.get(name)
for n, old_id in current_ops.items():
new_id = all_pairs.get(n)
if new_id is None:
missing.append(name)
missing.append(n)
elif new_id != old_id:
changed.append((name, old_id, new_id))
updated = updated.replace(f'"{old_id}/{name}"', f'"{new_id}/{name}"')
changed.append((n, old_id, new_id))
updated = updated.replace(f'"{old_id}/{n}"', f'"{new_id}/{n}"')

if changed:
print("Changed:")
for name, old_id, new_id in changed:
print(f" OP_{name}: {old_id} → {new_id}")
for n, old_id, new_id in changed:
print(f" OP_{n}: {old_id} → {new_id}")
else:
print("No ID changes.")

if missing:
print("\nNot found in bundle (possibly removed):")
for name in missing:
print(f" OP_{name}")
for n in missing:
print(f" OP_{n}")

if changed and apply:
API_FILE.write_text(updated)
Expand Down
1 change: 1 addition & 0 deletions scripts/update_mocked_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def save_meta(meta: dict):

COMMANDS = [
("user_by_login", lambda api: api.user_by_login_raw("xdevelopers")),
("user_about", lambda api: api.user_about_raw("xdevelopers")),
("following", lambda api: _first(api.following_raw(2244994945, limit=10))),
("followers", lambda api: _first(api.followers_raw(2244994945, limit=10))),
("verified_followers", lambda api: _first(api.verified_followers_raw(2244994945, limit=10))),
Expand Down
3 changes: 2 additions & 1 deletion tests/mocked-data/.meta.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

76 changes: 76 additions & 0 deletions tests/mocked-data/raw_user_about.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,32 @@ async def test_user_by_login():
assert str(doc.id) in txt


async def test_user_about():
api = get_api()
mock_rep(api.user_about_raw, "raw_user_about")

doc = await api.user_about("xdevelopers")
assert doc is not None
assert doc.screen_name == "XDevelopers"
assert isinstance(doc.rest_id, int)
assert isinstance(doc.name, str) and len(doc.name) > 0
assert doc.account_based_in is not None
assert doc.location_accurate is not None
assert doc.affiliate_username is not None
assert doc.source is not None
assert isinstance(doc.username_changes, int)
assert isinstance(doc.username_last_changed_at, int)
assert doc.is_identity_verified is not None
assert isinstance(doc.verified_since_msec, int)

obj = doc.dict()
assert doc.screen_name == obj["screen_name"]

txt = doc.json()
assert isinstance(txt, str)
assert doc.screen_name in txt


async def test_tweet_details():
api = get_api()
mock_rep(api.tweet_details_raw, "raw_tweet_details")
Expand Down
43 changes: 33 additions & 10 deletions twscrape/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,38 @@

from .accounts_pool import AccountsPool
from .logger import set_log_level
from .models import Tweet, User, parse_trends, parse_tweet, parse_tweets, parse_user, parse_users
from .models import (
AccountAbout,
Tweet,
User,
parse_about,
parse_trends,
parse_tweet,
parse_tweets,
parse_user,
parse_users,
)
from .queue_client import QueueClient
from .utils import encode_params, find_obj, get_by_path

# OP_{NAME} – {NAME} should be same as second part of GQL ID (required to auto-update script)
OP_SearchTimeline = "Yw6L66Pw54NHKuq4Dp7b4Q/SearchTimeline"
OP_UserByScreenName = "IGgvgiOx4QZndDHuD3x9TQ/UserByScreenName"
OP_TweetDetail = "oCon7R-cgWRFy6EfZjaKfg/TweetDetail"
OP_AboutAccountQuery = "zUnx-DLN9dkwOkNhTLySjg/AboutAccountQuery"
OP_BlueVerifiedFollowers = "crKOXrAHR3W3aPuKEJG8GA/BlueVerifiedFollowers"
OP_Bookmarks = "XD0ViOeSOW4YoeNTGjVaYw/Bookmarks"
OP_Followers = "_orfRBQae57vylFPH0Huhg/Followers"
OP_Following = "F42cDX8PDFxkbjjq6JrM2w/Following"
OP_Retweeters = "TZsWuSj7vGmncVnq7KWDUQ/Retweeters"
OP_UserTweets = "36rb3Xj3iJ64Q-9wKDjCcQ/UserTweets"
OP_UserTweetsAndReplies = "D5eKzDa5ZoJuC1TCeAXbWA/UserTweetsAndReplies"
OP_GenericTimelineById = "_dGVIf1cY6xFanFNPsAzPQ/GenericTimelineById"
OP_ListLatestTweetsTimeline = "7UuJsFvnWuZo0HmxrzU42Q/ListLatestTweetsTimeline"
OP_BlueVerifiedFollowers = "crKOXrAHR3W3aPuKEJG8GA/BlueVerifiedFollowers"
OP_Retweeters = "TZsWuSj7vGmncVnq7KWDUQ/Retweeters"
OP_SearchTimeline = "Yw6L66Pw54NHKuq4Dp7b4Q/SearchTimeline"
OP_TweetDetail = "oCon7R-cgWRFy6EfZjaKfg/TweetDetail"
OP_UserByRestId = "VQfQ9wwYdk6j_u2O4vt64Q/UserByRestId"
OP_UserByScreenName = "IGgvgiOx4QZndDHuD3x9TQ/UserByScreenName"
OP_UserCreatorSubscriptions = "-9O4xZ8ykY_Hf6kyHJX30A/UserCreatorSubscriptions"
OP_UserMedia = "9EovraBTXJYGSEQXZqlLmQ/UserMedia"
OP_Bookmarks = "XD0ViOeSOW4YoeNTGjVaYw/Bookmarks"
OP_GenericTimelineById = "_dGVIf1cY6xFanFNPsAzPQ/GenericTimelineById"
OP_UserTweets = "36rb3Xj3iJ64Q-9wKDjCcQ/UserTweets"
OP_UserTweetsAndReplies = "D5eKzDa5ZoJuC1TCeAXbWA/UserTweetsAndReplies"


GQL_URL = "https://x.com/i/api/graphql"
GQL_FEATURES = { # search values here (view source) https://x.com/
Expand Down Expand Up @@ -206,6 +219,16 @@ async def user_by_login(self, login: str, kv: KV = None) -> User | None:
rep = await self.user_by_login_raw(login, kv=kv)
return parse_user(rep) if rep else None

async def user_about_raw(self, username: str, kv: KV = None):
op = OP_AboutAccountQuery
kv = {"screenName": username, **(kv or {})}
ft = {"responsive_web_graphql_timeline_navigation_enabled": True}
return await self._gql_item(op, kv, ft)

async def user_about(self, username: str, kv: KV = None) -> AccountAbout | None:
rep = await self.user_about_raw(username, kv=kv)
return parse_about(rep) if rep else None

# tweet_details

async def tweet_details_raw(self, twid: int, kv: KV = None):
Expand Down
1 change: 1 addition & 0 deletions twscrape/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def c_lim(name: str, msg: str, a_name: str, a_msg: str, a_type: type = str):
c_lim("tweet_replies", "Get replies of a tweet", "tweet_id", "Tweet ID", int)
c_lim("retweeters", "Get retweeters of a tweet", "tweet_id", "Tweet ID", int)
c_one("user_by_login", "Get user data by username", "username", "Username")
c_one("user_about", "Get about info for username", "username", "Username")
c_lim("following", "Get user following", "user_id", "User ID", int)
c_lim("followers", "Get user followers", "user_id", "User ID", int)
# https://x.com/xDaily/status/1701694747767648500
Expand Down
49 changes: 49 additions & 0 deletions twscrape/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,43 @@ def parse(obj: dict):
return TextLink(url=url1, text=text, tcourl=url2)


@dataclass
class AccountAbout(JSONTrait):
screen_name: str
name: str
rest_id: int | None
account_based_in: str | None
location_accurate: bool | None
affiliate_username: str | None
source: str | None
username_changes: int | None
username_last_changed_at: int | None
is_identity_verified: bool | None
verified_since_msec: int | None

@staticmethod
def parse(obj: dict):
about = obj.get("about_profile") or {}
core = obj.get("core") or {}
verification = obj.get("verification_info", {}) or {}
reason = verification.get("reason", {}) or {}
return AccountAbout(
screen_name=core.get("screen_name", ""),
name=core.get("name", ""),
rest_id=int_or(obj, "rest_id"),
account_based_in=about.get("account_based_in"),
location_accurate=about.get("location_accurate"),
affiliate_username=about.get("affiliate_username"),
source=about.get("source"),
username_changes=int_or(about.get("username_changes") or {}, "count"),
username_last_changed_at=int_or(
about.get("username_changes") or {}, "last_changed_at_msec"
),
is_identity_verified=verification.get("is_identity_verified"),
verified_since_msec=int_or(reason, "verified_since_msec"),
)


@dataclass
class UserRef(JSONTrait):
id: int
Expand Down Expand Up @@ -769,6 +806,18 @@ def parse_trend(rep: httpx.Response) -> Trend | None:
return None


def parse_about(rep: httpx.Response | dict) -> AccountAbout | None:
try:
res = rep if isinstance(rep, dict) else rep.json()
obj = get_or(res, "data.user_result_by_screen_name.result")
if not obj:
return None
return AccountAbout.parse(obj)
except Exception as e:
logger.error(f"Failed to parse about profile - {type(e)}:\n{traceback.format_exc()}")
return None


def parse_tweets(rep: httpx.Response, limit: int = -1) -> Generator[Tweet, None, None]:
return _parse_items(rep, "tweet", limit) # type: ignore

Expand Down