i
This commit is contained in:
parent
48b10b1529
commit
cd7708dfc1
281
src/client.py
281
src/client.py
|
@ -22,8 +22,6 @@ class DiscordDataClient(discord.Client):
|
||||||
"""Custom Discord client for collecting user data."""
|
"""Custom Discord client for collecting user data."""
|
||||||
|
|
||||||
def __init__(self, config: Config, database):
|
def __init__(self, config: Config, database):
|
||||||
|
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
|
@ -37,15 +35,51 @@ class DiscordDataClient(discord.Client):
|
||||||
self.processed_users: Set[int] = set()
|
self.processed_users: Set[int] = set()
|
||||||
self.target_servers = set(config.get_target_servers())
|
self.target_servers = set(config.get_target_servers())
|
||||||
|
|
||||||
# Start background tasks
|
# Initialize tasks properly - don't start them yet
|
||||||
self.cleanup_task.start()
|
self._setup_tasks()
|
||||||
self.stats_task.start()
|
|
||||||
|
def _setup_tasks(self):
|
||||||
|
"""Set up the background tasks."""
|
||||||
|
@tasks.loop(hours=1)
|
||||||
|
async def cleanup_task():
|
||||||
|
"""Periodic cleanup task."""
|
||||||
|
try:
|
||||||
|
# Clean up old backups
|
||||||
|
await self.database.cleanup_old_backups()
|
||||||
|
|
||||||
|
# Clear processed users set to allow re-processing
|
||||||
|
self.processed_users.clear()
|
||||||
|
|
||||||
|
self.logger.info("Cleanup task completed")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error in cleanup task: {e}")
|
||||||
|
|
||||||
|
@tasks.loop(minutes=30)
|
||||||
|
async def stats_task():
|
||||||
|
"""Periodic statistics logging."""
|
||||||
|
try:
|
||||||
|
stats = await self.database.get_statistics()
|
||||||
|
self.logger.info(f"Database stats: {stats['total_users']} users, "
|
||||||
|
f"{stats['total_servers']} servers, "
|
||||||
|
f"{stats['database_size']} bytes")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error in stats task: {e}")
|
||||||
|
|
||||||
|
# Assign tasks to instance
|
||||||
|
self.cleanup_task = cleanup_task
|
||||||
|
self.stats_task = stats_task
|
||||||
|
|
||||||
async def on_ready(self):
|
async def on_ready(self):
|
||||||
"""Called when the client is ready."""
|
"""Called when the client is ready."""
|
||||||
self.logger.info(f"Logged in as {self.user} (ID: {self.user.id})")
|
self.logger.info(f"Logged in as {self.user} (ID: {self.user.id})")
|
||||||
self.logger.info(f"Connected to {len(self.guilds)} servers")
|
self.logger.info(f"Connected to {len(self.guilds)} servers")
|
||||||
|
|
||||||
|
# Start background tasks after we're ready
|
||||||
|
self.cleanup_task.start()
|
||||||
|
self.stats_task.start()
|
||||||
|
|
||||||
# Initial scan of server members
|
# Initial scan of server members
|
||||||
await self._scan_all_servers()
|
await self._scan_all_servers()
|
||||||
|
|
||||||
|
@ -172,109 +206,113 @@ class DiscordDataClient(discord.Client):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error processing user {user.name}: {e}")
|
self.logger.error(f"Error processing user {user.name}: {e}")
|
||||||
|
|
||||||
async def _get_user_bio(self, user) -> Optional[str]:
|
async def _get_user_bio(self, user) -> Optional[str]:
|
||||||
"""Get user bio/about me section."""
|
"""Get user bio/about me section."""
|
||||||
if not self.config.collect_bio:
|
if not self.config.collect_bio:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
bio = None
|
bio = None
|
||||||
|
|
||||||
|
# Debug logging
|
||||||
|
self.logger.debug(f"User object type: {type(user)}")
|
||||||
|
self.logger.debug(f"User attributes: {[attr for attr in dir(user) if not attr.startswith('_')]}")
|
||||||
|
self.logger.debug(f"Client methods: {[method for method in dir(self) if 'profile' in method.lower()]}")
|
||||||
|
|
||||||
|
# Method 1: Check if user object already has bio (for ClientUser)
|
||||||
|
if hasattr(user, 'bio') and user.bio:
|
||||||
|
bio = user.bio
|
||||||
|
self.logger.debug(f"Found bio via user.bio for {user.name}")
|
||||||
|
return bio[:500] if bio else None
|
||||||
|
|
||||||
|
# Method 2: Try to fetch user profile specifically
|
||||||
|
if hasattr(user, 'id'):
|
||||||
|
try:
|
||||||
|
# Try multiple profile fetch methods
|
||||||
|
profile = None
|
||||||
|
|
||||||
|
# Try different possible method names
|
||||||
|
if hasattr(self, 'fetch_user_profile'):
|
||||||
|
profile = await self.fetch_user_profile(user.id)
|
||||||
|
elif hasattr(self, 'fetch_profile'):
|
||||||
|
profile = await self.fetch_profile(user.id)
|
||||||
|
elif hasattr(user, 'fetch_profile'):
|
||||||
|
profile = await user.fetch_profile()
|
||||||
|
else:
|
||||||
|
# Fallback to regular fetch_user and check for profile attr
|
||||||
|
fetched_user = await self.fetch_user(user.id)
|
||||||
|
if hasattr(fetched_user, 'profile'):
|
||||||
|
profile = fetched_user.profile
|
||||||
|
else:
|
||||||
|
profile = fetched_user
|
||||||
|
|
||||||
|
if profile:
|
||||||
|
# Check all possible bio attributes
|
||||||
|
bio_attrs = ['bio', 'display_bio', 'about', 'about_me', 'description']
|
||||||
|
for attr in bio_attrs:
|
||||||
|
if hasattr(profile, attr):
|
||||||
|
bio_value = getattr(profile, attr)
|
||||||
|
if bio_value:
|
||||||
|
bio = bio_value
|
||||||
|
self.logger.debug(f"Found {attr} via profile fetch for {user.name}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if not bio:
|
||||||
|
self.logger.debug(f"Profile found but no bio attributes for {user.name}")
|
||||||
|
# Debug: log available attributes
|
||||||
|
attrs = [attr for attr in dir(profile) if not attr.startswith('_')]
|
||||||
|
self.logger.debug(f"Available profile attributes: {attrs}")
|
||||||
|
else:
|
||||||
|
self.logger.debug(f"No profile method available for {user.name}")
|
||||||
|
|
||||||
|
except discord.Forbidden:
|
||||||
|
self.logger.debug(f"Access denied to profile for {user.name} - user may have privacy settings enabled")
|
||||||
|
return None
|
||||||
|
except discord.NotFound:
|
||||||
|
self.logger.debug(f"Profile not found for {user.name}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.debug(f"Profile fetch failed for {user.name}: {e}")
|
||||||
|
|
||||||
|
# Method 3: Try member profile if in a guild context
|
||||||
|
if not bio and hasattr(user, 'guild') and user.guild:
|
||||||
|
try:
|
||||||
|
member_profile = None
|
||||||
|
if hasattr(user.guild, 'fetch_member_profile'):
|
||||||
|
member_profile = await user.guild.fetch_member_profile(user.id)
|
||||||
|
elif hasattr(user, 'fetch_member_profile'):
|
||||||
|
member_profile = await user.fetch_member_profile()
|
||||||
|
|
||||||
|
if member_profile:
|
||||||
|
bio_attrs = ['bio', 'display_bio', 'guild_bio', 'about', 'about_me']
|
||||||
|
for attr in bio_attrs:
|
||||||
|
if hasattr(member_profile, attr):
|
||||||
|
bio_value = getattr(member_profile, attr)
|
||||||
|
if bio_value:
|
||||||
|
bio = bio_value
|
||||||
|
self.logger.debug(f"Found {attr} via member profile for {user.name}")
|
||||||
|
break
|
||||||
|
except discord.Forbidden:
|
||||||
|
self.logger.debug(f"Access denied to member profile for {user.name}")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.debug(f"Member profile fetch failed for {user.name}: {e}")
|
||||||
|
|
||||||
|
# Method 4: Fallback to activities (last resort)
|
||||||
|
if not bio and hasattr(user, 'activities'):
|
||||||
|
for activity in user.activities:
|
||||||
|
if hasattr(activity, 'name') and activity.name and len(activity.name) > 20:
|
||||||
|
bio = f"Activity: {activity.name}"
|
||||||
|
self.logger.debug(f"Using activity as bio for {user.name}: {activity.name}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if not bio:
|
||||||
|
self.logger.debug(f"No bio found for user {user.name}")
|
||||||
|
|
||||||
# Method 1: Check if user object already has bio (for ClientUser)
|
|
||||||
if hasattr(user, 'bio') and user.bio:
|
|
||||||
bio = user.bio
|
|
||||||
self.logger.debug(f"Found bio via user.bio for {user.name}")
|
|
||||||
return bio[:500] if bio else None
|
return bio[:500] if bio else None
|
||||||
|
|
||||||
# Method 2: Try to fetch user profile specifically
|
except Exception as e:
|
||||||
if hasattr(user, 'id'):
|
self.logger.debug(f"Could not fetch bio for user {user.name}: {e}")
|
||||||
try:
|
return None
|
||||||
# Try multiple profile fetch methods
|
|
||||||
profile = None
|
|
||||||
|
|
||||||
# Try different possible method names
|
|
||||||
if hasattr(self, 'fetch_user_profile'):
|
|
||||||
profile = await self.fetch_user_profile(user.id)
|
|
||||||
elif hasattr(self, 'fetch_profile'):
|
|
||||||
profile = await self.fetch_profile(user.id)
|
|
||||||
elif hasattr(user, 'fetch_profile'):
|
|
||||||
profile = await user.fetch_profile()
|
|
||||||
else:
|
|
||||||
# Fallback to regular fetch_user and check for profile attr
|
|
||||||
fetched_user = await self.fetch_user(user.id)
|
|
||||||
if hasattr(fetched_user, 'profile'):
|
|
||||||
profile = fetched_user.profile
|
|
||||||
else:
|
|
||||||
profile = fetched_user
|
|
||||||
|
|
||||||
|
|
||||||
if profile:
|
|
||||||
# Check all possible bio attributes
|
|
||||||
bio_attrs = ['bio', 'display_bio', 'about', 'about_me', 'description']
|
|
||||||
for attr in bio_attrs:
|
|
||||||
if hasattr(profile, attr):
|
|
||||||
bio_value = getattr(profile, attr)
|
|
||||||
if bio_value:
|
|
||||||
bio = bio_value
|
|
||||||
self.logger.debug(f"Found {attr} via profile fetch for {user.name}")
|
|
||||||
break
|
|
||||||
|
|
||||||
if not bio:
|
|
||||||
self.logger.debug(f"Profile found but no bio attributes for {user.name}")
|
|
||||||
# Debug: log available attributes
|
|
||||||
attrs = [attr for attr in dir(profile) if not attr.startswith('_')]
|
|
||||||
self.logger.debug(f"Available profile attributes: {attrs}")
|
|
||||||
else:
|
|
||||||
self.logger.debug(f"No profile method available for {user.name}")
|
|
||||||
|
|
||||||
except discord.Forbidden:
|
|
||||||
self.logger.debug(f"Access denied to profile for {user.name} - user may have privacy settings enabled")
|
|
||||||
return None
|
|
||||||
except discord.NotFound:
|
|
||||||
self.logger.debug(f"Profile not found for {user.name}")
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.debug(f"Profile fetch failed for {user.name}: {e}")
|
|
||||||
|
|
||||||
# Method 3: Try member profile if in a guild context
|
|
||||||
if not bio and hasattr(user, 'guild') and user.guild:
|
|
||||||
try:
|
|
||||||
member_profile = None
|
|
||||||
if hasattr(user.guild, 'fetch_member_profile'):
|
|
||||||
member_profile = await user.guild.fetch_member_profile(user.id)
|
|
||||||
elif hasattr(user, 'fetch_member_profile'):
|
|
||||||
member_profile = await user.fetch_member_profile()
|
|
||||||
|
|
||||||
if member_profile:
|
|
||||||
bio_attrs = ['bio', 'display_bio', 'guild_bio', 'about', 'about_me']
|
|
||||||
for attr in bio_attrs:
|
|
||||||
if hasattr(member_profile, attr):
|
|
||||||
bio_value = getattr(member_profile, attr)
|
|
||||||
if bio_value:
|
|
||||||
bio = bio_value
|
|
||||||
self.logger.debug(f"Found {attr} via member profile for {user.name}")
|
|
||||||
break
|
|
||||||
except discord.Forbidden:
|
|
||||||
self.logger.debug(f"Access denied to member profile for {user.name}")
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.debug(f"Member profile fetch failed for {user.name}: {e}")
|
|
||||||
|
|
||||||
# Method 4: Fallback to activities (last resort)
|
|
||||||
if not bio and hasattr(user, 'activities'):
|
|
||||||
for activity in user.activities:
|
|
||||||
if hasattr(activity, 'name') and activity.name and len(activity.name) > 20:
|
|
||||||
bio = f"Activity: {activity.name}"
|
|
||||||
self.logger.debug(f"Using activity as bio for {user.name}: {activity.name}")
|
|
||||||
break
|
|
||||||
|
|
||||||
if not bio:
|
|
||||||
self.logger.debug(f"No bio found for user {user.name}")
|
|
||||||
|
|
||||||
return bio[:500] if bio else None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.debug(f"Could not fetch bio for user {user.name}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _get_user_status(self, user) -> Optional[str]:
|
def _get_user_status(self, user) -> Optional[str]:
|
||||||
"""Get user status with better handling."""
|
"""Get user status with better handling."""
|
||||||
|
@ -326,33 +364,6 @@ async def _get_user_bio(self, user) -> Optional[str]:
|
||||||
self.logger.debug(f"Could not get activity for user {user.name}: {e}")
|
self.logger.debug(f"Could not get activity for user {user.name}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@tasks.loop(hours=1)
|
|
||||||
async def cleanup_task(self):
|
|
||||||
"""Periodic cleanup task."""
|
|
||||||
try:
|
|
||||||
# Clean up old backups
|
|
||||||
await self.database.cleanup_old_backups()
|
|
||||||
|
|
||||||
# Clear processed users set to allow re-processing
|
|
||||||
self.processed_users.clear()
|
|
||||||
|
|
||||||
self.logger.info("Cleanup task completed")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Error in cleanup task: {e}")
|
|
||||||
|
|
||||||
@tasks.loop(minutes=30)
|
|
||||||
async def stats_task(self):
|
|
||||||
"""Periodic statistics logging."""
|
|
||||||
try:
|
|
||||||
stats = await self.database.get_statistics()
|
|
||||||
self.logger.info(f"Database stats: {stats['total_users']} users, "
|
|
||||||
f"{stats['total_servers']} servers, "
|
|
||||||
f"{stats['database_size']} bytes")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Error in stats task: {e}")
|
|
||||||
|
|
||||||
async def export_data(self, format_type: str = "csv", output_path: str = None):
|
async def export_data(self, format_type: str = "csv", output_path: str = None):
|
||||||
"""Export collected data."""
|
"""Export collected data."""
|
||||||
if output_path is None:
|
if output_path is None:
|
||||||
|
@ -376,9 +387,11 @@ async def _get_user_bio(self, user) -> Optional[str]:
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""Clean shutdown."""
|
"""Clean shutdown."""
|
||||||
# Cancel background tasks
|
# Cancel background tasks if they exist and are running
|
||||||
self.cleanup_task.cancel()
|
if hasattr(self, 'cleanup_task') and not self.cleanup_task.is_finished():
|
||||||
self.stats_task.cancel()
|
self.cleanup_task.cancel()
|
||||||
|
if hasattr(self, 'stats_task') and not self.stats_task.is_finished():
|
||||||
|
self.stats_task.cancel()
|
||||||
|
|
||||||
# Close parent client
|
# Close parent client
|
||||||
await super().close()
|
await super().close()
|
||||||
|
|
Loading…
Reference in a new issue