Initial commit v2
This commit is contained in:
parent
5b961b3853
commit
8edda894db
13
.env.example
Normal file
13
.env.example
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# Discord Data Collector Environment Variables
|
||||||
|
# Copy this file to .env and fill in your values
|
||||||
|
|
||||||
|
# Discord user token (REQUIRED)
|
||||||
|
# WARNING: This should be your user token, not a bot token
|
||||||
|
# Keep this secret and never share it publicly
|
||||||
|
DISCORD_TOKEN=your_discord_user_token_here
|
||||||
|
|
||||||
|
# Optional: Database connection string for future MongoDB integration
|
||||||
|
# MONGODB_URI=mongodb://localhost:27017/discord_research
|
||||||
|
|
||||||
|
# Optional: Additional API keys for extended functionality
|
||||||
|
# BACKUP_WEBHOOK_URL=https://discord.com/api/webhooks/your_webhook_url
|
182
cli.py
Normal file
182
cli.py
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Command-line interface for Discord Data Collector.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add src to path
|
||||||
|
sys.path.append(str(Path(__file__).parent))
|
||||||
|
|
||||||
|
from src.config import Config
|
||||||
|
from src.database import JSONDatabase
|
||||||
|
from src.client import DiscordDataClient
|
||||||
|
|
||||||
|
|
||||||
|
async def export_data(format_type: str, output_path: str = None):
|
||||||
|
"""Export collected data."""
|
||||||
|
config = Config()
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
|
||||||
|
if output_path is None:
|
||||||
|
from datetime import datetime
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
output_path = f"data/export_{timestamp}.{format_type}"
|
||||||
|
|
||||||
|
if format_type == "csv":
|
||||||
|
await database.export_to_csv(output_path)
|
||||||
|
print(f"Data exported to {output_path}")
|
||||||
|
else:
|
||||||
|
print(f"Unsupported format: {format_type}")
|
||||||
|
|
||||||
|
|
||||||
|
async def show_stats():
|
||||||
|
"""Show database statistics."""
|
||||||
|
config = Config()
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
|
||||||
|
stats = await database.get_statistics()
|
||||||
|
|
||||||
|
print("\n=== Database Statistics ===")
|
||||||
|
print(f"Total users: {stats['total_users']}")
|
||||||
|
print(f"Total servers: {stats['total_servers']}")
|
||||||
|
print(f"Database size: {stats['database_size']} bytes")
|
||||||
|
|
||||||
|
if stats['most_active_servers']:
|
||||||
|
print("\nMost active servers:")
|
||||||
|
for server_id, user_count in stats['most_active_servers'][:5]:
|
||||||
|
print(f" Server {server_id}: {user_count} users")
|
||||||
|
|
||||||
|
|
||||||
|
async def search_user(query: str):
|
||||||
|
"""Search for users."""
|
||||||
|
config = Config()
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
|
||||||
|
all_users = await database.get_all_users()
|
||||||
|
|
||||||
|
# Search by username or user ID
|
||||||
|
results = []
|
||||||
|
for user in all_users:
|
||||||
|
if (query.lower() in user.username.lower() or
|
||||||
|
query.lower() in (user.display_name or "").lower() or
|
||||||
|
query == str(user.user_id)):
|
||||||
|
results.append(user)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
print("No users found matching the query.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"\n=== Found {len(results)} users ===")
|
||||||
|
for user in results[:10]: # Show first 10 results
|
||||||
|
print(f"{user.username}#{user.discriminator} (ID: {user.user_id})")
|
||||||
|
if user.display_name:
|
||||||
|
print(f" Display name: {user.display_name}")
|
||||||
|
if user.bio:
|
||||||
|
print(f" Bio: {user.bio[:100]}...")
|
||||||
|
print(f" Servers: {len(user.servers)}")
|
||||||
|
print(f" Last updated: {user.updated_at}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
async def backup_database():
|
||||||
|
"""Create a manual backup of the database."""
|
||||||
|
config = Config()
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
backup_path = f"data/backups/manual_backup_{timestamp}.json"
|
||||||
|
|
||||||
|
# Copy current database
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(database.database_path, backup_path)
|
||||||
|
|
||||||
|
print(f"Database backed up to {backup_path}")
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_data():
|
||||||
|
"""Clean up old data and backups."""
|
||||||
|
config = Config()
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
|
||||||
|
await database.cleanup_old_backups(max_backups=5)
|
||||||
|
print("Cleanup completed")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_connection():
|
||||||
|
"""Test Discord connection."""
|
||||||
|
try:
|
||||||
|
config = Config()
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
client = DiscordDataClient(config, database)
|
||||||
|
|
||||||
|
print("Testing Discord connection...")
|
||||||
|
|
||||||
|
# This will test the connection without starting the full bot
|
||||||
|
await client.login(config.discord_token)
|
||||||
|
user_info = client.user
|
||||||
|
|
||||||
|
print(f"✓ Successfully connected as {user_info.name}#{user_info.discriminator}")
|
||||||
|
print(f"✓ User ID: {user_info.id}")
|
||||||
|
|
||||||
|
await client.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Connection failed: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main CLI entry point."""
|
||||||
|
parser = argparse.ArgumentParser(description="Discord Data Collector CLI")
|
||||||
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
||||||
|
|
||||||
|
# Export command
|
||||||
|
export_parser = subparsers.add_parser("export", help="Export collected data")
|
||||||
|
export_parser.add_argument("format", choices=["csv"], help="Export format")
|
||||||
|
export_parser.add_argument("-o", "--output", help="Output file path")
|
||||||
|
|
||||||
|
# Stats command
|
||||||
|
subparsers.add_parser("stats", help="Show database statistics")
|
||||||
|
|
||||||
|
# Search command
|
||||||
|
search_parser = subparsers.add_parser("search", help="Search for users")
|
||||||
|
search_parser.add_argument("query", help="Search query (username or user ID)")
|
||||||
|
|
||||||
|
# Backup command
|
||||||
|
subparsers.add_parser("backup", help="Create manual database backup")
|
||||||
|
|
||||||
|
# Cleanup command
|
||||||
|
subparsers.add_parser("cleanup", help="Clean up old data and backups")
|
||||||
|
|
||||||
|
# Test command
|
||||||
|
subparsers.add_parser("test", help="Test Discord connection")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.command:
|
||||||
|
parser.print_help()
|
||||||
|
return
|
||||||
|
|
||||||
|
# Run the appropriate command
|
||||||
|
if args.command == "export":
|
||||||
|
asyncio.run(export_data(args.format, args.output))
|
||||||
|
elif args.command == "stats":
|
||||||
|
asyncio.run(show_stats())
|
||||||
|
elif args.command == "search":
|
||||||
|
asyncio.run(search_user(args.query))
|
||||||
|
elif args.command == "backup":
|
||||||
|
asyncio.run(backup_database())
|
||||||
|
elif args.command == "cleanup":
|
||||||
|
asyncio.run(cleanup_data())
|
||||||
|
elif args.command == "test":
|
||||||
|
asyncio.run(test_connection())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
33
config.toml
Normal file
33
config.toml
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
# Discord Data Collector Configuration
|
||||||
|
|
||||||
|
[database]
|
||||||
|
# JSON database file path
|
||||||
|
path = "data/users.json"
|
||||||
|
# Backup interval in seconds (3600 = 1 hour)
|
||||||
|
backup_interval = 3600
|
||||||
|
|
||||||
|
[collection]
|
||||||
|
# What data to collect
|
||||||
|
profile_pictures = true
|
||||||
|
bio = true
|
||||||
|
status = true
|
||||||
|
server_membership = true
|
||||||
|
|
||||||
|
[rate_limiting]
|
||||||
|
# Delay between API requests in seconds
|
||||||
|
request_delay = 1.0
|
||||||
|
# Maximum requests per minute
|
||||||
|
max_requests_per_minute = 30
|
||||||
|
|
||||||
|
[monitoring]
|
||||||
|
# List of specific server IDs to monitor (leave empty to monitor all)
|
||||||
|
# Example: target_servers = [123456789, 987654321]
|
||||||
|
target_servers = []
|
||||||
|
# Monitor all servers the account is in
|
||||||
|
monitor_all_servers = true
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||||
|
level = "INFO"
|
||||||
|
# Log file path
|
||||||
|
file = "logs/collector.log"
|
50
main.py
Normal file
50
main.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Discord User Data Collector
|
||||||
|
Main application entry point for collecting Discord user data for research purposes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from src.client import DiscordDataClient
|
||||||
|
from src.config import Config
|
||||||
|
from src.database import JSONDatabase
|
||||||
|
from src.logger import setup_logger
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Main application entry point."""
|
||||||
|
try:
|
||||||
|
# Setup configuration
|
||||||
|
config = Config()
|
||||||
|
|
||||||
|
# Setup logging
|
||||||
|
logger = setup_logger(config.log_level, config.log_file)
|
||||||
|
logger.info("Starting Discord Data Collector")
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
|
||||||
|
# Initialize Discord client
|
||||||
|
client = DiscordDataClient(config, database)
|
||||||
|
|
||||||
|
# Start the client
|
||||||
|
logger.info("Starting Discord client...")
|
||||||
|
await client.start(config.discord_token)
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
logger.info("Received keyboard interrupt, shutting down...")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Fatal error: {e}", exc_info=True)
|
||||||
|
sys.exit(1)
|
||||||
|
finally:
|
||||||
|
if 'client' in locals():
|
||||||
|
await client.close()
|
||||||
|
logger.info("Application shutdown complete")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
208
readme.md
Normal file
208
readme.md
Normal file
|
@ -0,0 +1,208 @@
|
||||||
|
# ⚠️ Warning! ai slop.
|
||||||
|
|
||||||
|
|
||||||
|
# Discord Data Collector
|
||||||
|
|
||||||
|
A Python application for collecting Discord user data for research purposes, specifically designed to study information propagation patterns in Discord communities.
|
||||||
|
|
||||||
|
## Important Disclaimers
|
||||||
|
|
||||||
|
- **Terms of Service**: This application uses self-botting, which violates Discord's Terms of Service and may result in account suspension.
|
||||||
|
- **Educational Use Only**: This tool is intended solely for educational and research purposes.
|
||||||
|
- **Privacy Considerations**: Always respect user privacy and obtain proper consent when collecting data.
|
||||||
|
- **Legal Compliance**: Ensure compliance with applicable data protection laws (GDPR, CCPA, etc.).
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **User Data Collection**: Automatically collects usernames, profile pictures, bios, status, and server memberships
|
||||||
|
- **Message Monitoring**: Processes messages from monitored servers to identify active users
|
||||||
|
- **Rate Limiting**: Built-in rate limiting to avoid hitting Discord API limits
|
||||||
|
- **Flexible Configuration**: Easy configuration via TOML and environment files
|
||||||
|
- **Data Export**: Export collected data to CSV format
|
||||||
|
- **Database Management**: JSON-based storage with automatic backups
|
||||||
|
- **CLI Tools**: Command-line interface for data management and analysis
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
1. **Clone the repository**:
|
||||||
|
```bash
|
||||||
|
git clone <repository-url>
|
||||||
|
cd discord-data-collector
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Install dependencies**:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Create configuration files**:
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your Discord token
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Configure settings**:
|
||||||
|
- Edit `config.toml` to adjust collection settings
|
||||||
|
- Add your Discord user token to `.env`
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Environment Variables (.env)
|
||||||
|
|
||||||
|
```env
|
||||||
|
# Your Discord user token (REQUIRED)
|
||||||
|
DISCORD_TOKEN=your_discord_user_token_here
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration File (config.toml)
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[database]
|
||||||
|
path = "data/users.json"
|
||||||
|
backup_interval = 3600
|
||||||
|
|
||||||
|
[collection]
|
||||||
|
profile_pictures = true
|
||||||
|
bio = true
|
||||||
|
status = true
|
||||||
|
server_membership = true
|
||||||
|
|
||||||
|
[rate_limiting]
|
||||||
|
request_delay = 1.0
|
||||||
|
max_requests_per_minute = 30
|
||||||
|
|
||||||
|
[monitoring]
|
||||||
|
target_servers = [] # Empty = monitor all servers
|
||||||
|
monitor_all_servers = true
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
level = "INFO"
|
||||||
|
file = "logs/collector.log"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Running the Collector
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start the data collector
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### CLI Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Show database statistics
|
||||||
|
python cli.py stats
|
||||||
|
|
||||||
|
# Search for users
|
||||||
|
python cli.py search "username"
|
||||||
|
|
||||||
|
# Export data to CSV
|
||||||
|
python cli.py export csv -o exported_data.csv
|
||||||
|
|
||||||
|
# Test Discord connection
|
||||||
|
python cli.py test
|
||||||
|
|
||||||
|
# Create manual backup
|
||||||
|
python cli.py backup
|
||||||
|
|
||||||
|
# Clean up old backups
|
||||||
|
python cli.py cleanup
|
||||||
|
```
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
discord-data-collector/
|
||||||
|
├── main.py # Main application entry point
|
||||||
|
├── cli.py # Command-line interface
|
||||||
|
├── config.toml # Configuration file
|
||||||
|
├── .env # Environment variables
|
||||||
|
├── requirements.txt # Python dependencies
|
||||||
|
├── src/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── client.py # Discord client implementation
|
||||||
|
│ ├── config.py # Configuration management
|
||||||
|
│ ├── database.py # JSON database manager
|
||||||
|
│ ├── rate_limiter.py # Rate limiting utilities
|
||||||
|
│ └── logger.py # Logging setup
|
||||||
|
├── data/
|
||||||
|
│ ├── users.json # User database
|
||||||
|
│ └── backups/ # Database backups
|
||||||
|
└── logs/
|
||||||
|
└── collector.log # Application logs
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Structure
|
||||||
|
|
||||||
|
Each user entry contains:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"user_id": 123456789,
|
||||||
|
"username": "example_user",
|
||||||
|
"discriminator": "1234",
|
||||||
|
"display_name": "Example User",
|
||||||
|
"avatar_url": "https://cdn.discordapp.com/avatars/...",
|
||||||
|
"banner_url": "https://cdn.discordapp.com/banners/...",
|
||||||
|
"bio": "User's about me section",
|
||||||
|
"status": "online",
|
||||||
|
"activity": "Playing a game",
|
||||||
|
"servers": [111111111, 222222222],
|
||||||
|
"created_at": "2024-01-01T00:00:00",
|
||||||
|
"updated_at": "2024-01-01T12:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Features in Detail
|
||||||
|
|
||||||
|
### Rate Limiting
|
||||||
|
- Configurable request delays
|
||||||
|
- Per-minute request limits
|
||||||
|
- Automatic backoff on rate limit hits
|
||||||
|
|
||||||
|
### Data Collection
|
||||||
|
- Real-time message monitoring
|
||||||
|
- Member list scanning
|
||||||
|
- Profile updates tracking
|
||||||
|
- Server membership tracking
|
||||||
|
|
||||||
|
### Database Management
|
||||||
|
- Automatic backups
|
||||||
|
- Data deduplication
|
||||||
|
- Export capabilities
|
||||||
|
- Statistics generation
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
- Configurable log levels
|
||||||
|
- File rotation
|
||||||
|
- Separate Discord.py logging
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
- MongoDB integration for better scalability
|
||||||
|
- Web dashboard for data visualization
|
||||||
|
- Advanced search and filtering
|
||||||
|
- Data analysis tools
|
||||||
|
- Network analysis features
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a feature branch
|
||||||
|
3. Make your changes
|
||||||
|
4. Add tests if applicable
|
||||||
|
5. Submit a pull request
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is for educational purposes only. Use responsibly and in compliance with applicable laws and terms of service.
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues or questions, please create an issue in the repository.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Remember**: This tool is for educational research only. Always respect user privacy and platform terms of service.
|
20
requirements.txt
Normal file
20
requirements.txt
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# Discord Data Collector Requirements
|
||||||
|
|
||||||
|
# Discord self-bot library
|
||||||
|
discord.py-self>=2.0.0
|
||||||
|
|
||||||
|
# Configuration management
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
toml>=0.10.2
|
||||||
|
|
||||||
|
# Database (for future MongoDB integration)
|
||||||
|
pymongo>=4.0.0
|
||||||
|
|
||||||
|
# Async utilities
|
||||||
|
asyncio-throttle>=1.0.0
|
||||||
|
|
||||||
|
# Data processing
|
||||||
|
pandas>=1.5.0
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
colorlog>=6.0.0
|
43
src/__init__.py
Normal file
43
src/__init__.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
"""
|
||||||
|
Discord Data Collector - A tool for collecting Discord user data for research purposes.
|
||||||
|
|
||||||
|
This package provides functionality to collect user data from Discord servers
|
||||||
|
for academic research, particularly focused on studying information propagation
|
||||||
|
and community dynamics.
|
||||||
|
|
||||||
|
Components:
|
||||||
|
- client: Discord client implementation
|
||||||
|
- config: Configuration management
|
||||||
|
- database: Data storage and management
|
||||||
|
- rate_limiter: API rate limiting
|
||||||
|
- logger: Logging utilities
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from src.client import DiscordDataClient
|
||||||
|
from src.config import Config
|
||||||
|
from src.database import JSONDatabase
|
||||||
|
|
||||||
|
config = Config()
|
||||||
|
database = JSONDatabase(config.database_path)
|
||||||
|
client = DiscordDataClient(config, database)
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "1.0.0"
|
||||||
|
__author__ = "Research Team"
|
||||||
|
__description__ = "Discord Data Collector for Research Purposes"
|
||||||
|
|
||||||
|
# Import main classes for easier access
|
||||||
|
from .client import DiscordDataClient
|
||||||
|
from .config import Config
|
||||||
|
from .database import JSONDatabase, UserData
|
||||||
|
from .rate_limiter import RateLimiter
|
||||||
|
from .logger import setup_logger
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'DiscordDataClient',
|
||||||
|
'Config',
|
||||||
|
'JSONDatabase',
|
||||||
|
'UserData',
|
||||||
|
'RateLimiter',
|
||||||
|
'setup_logger'
|
||||||
|
]
|
205
src/client.py
Normal file
205
src/client.py
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
"""
|
||||||
|
JSON database manager for Discord user data storage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional, Any
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UserData:
|
||||||
|
"""Data structure for storing user information."""
|
||||||
|
user_id: int
|
||||||
|
username: str
|
||||||
|
discriminator: str
|
||||||
|
display_name: Optional[str] = None
|
||||||
|
avatar_url: Optional[str] = None
|
||||||
|
banner_url: Optional[str] = None
|
||||||
|
bio: Optional[str] = None
|
||||||
|
status: Optional[str] = None
|
||||||
|
activity: Optional[str] = None
|
||||||
|
servers: List[int] = None
|
||||||
|
created_at: str = None
|
||||||
|
updated_at: str = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.servers is None:
|
||||||
|
self.servers = []
|
||||||
|
|
||||||
|
current_time = datetime.utcnow().isoformat()
|
||||||
|
if self.created_at is None:
|
||||||
|
self.created_at = current_time
|
||||||
|
self.updated_at = current_time
|
||||||
|
|
||||||
|
|
||||||
|
class JSONDatabase:
|
||||||
|
"""JSON-based database for storing Discord user data."""
|
||||||
|
|
||||||
|
def __init__(self, database_path: str):
|
||||||
|
"""Initialize the JSON database."""
|
||||||
|
self.database_path = Path(database_path)
|
||||||
|
self.backup_path = Path("data/backups")
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
self._data: Dict[str, Dict] = {}
|
||||||
|
|
||||||
|
# Ensure database directory exists
|
||||||
|
self.database_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.backup_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Load existing data
|
||||||
|
self._load_data()
|
||||||
|
|
||||||
|
def _load_data(self):
|
||||||
|
"""Load data from JSON file."""
|
||||||
|
if self.database_path.exists():
|
||||||
|
try:
|
||||||
|
with open(self.database_path, 'r', encoding='utf-8') as f:
|
||||||
|
self._data = json.load(f)
|
||||||
|
self.logger.info(f"Loaded {len(self._data)} users from database")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error loading database: {e}")
|
||||||
|
self._data = {}
|
||||||
|
else:
|
||||||
|
self._data = {}
|
||||||
|
self.logger.info("Created new database")
|
||||||
|
|
||||||
|
async def _save_data(self):
|
||||||
|
"""Save data to JSON file."""
|
||||||
|
async with self._lock:
|
||||||
|
try:
|
||||||
|
# Create backup before saving
|
||||||
|
if self.database_path.exists():
|
||||||
|
backup_filename = f"users_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||||
|
backup_path = self.backup_path / backup_filename
|
||||||
|
shutil.copy2(self.database_path, backup_path)
|
||||||
|
|
||||||
|
# Save data
|
||||||
|
with open(self.database_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(self._data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
self.logger.debug(f"Saved {len(self._data)} users to database")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error saving database: {e}")
|
||||||
|
|
||||||
|
async def get_user(self, user_id: int) -> Optional[UserData]:
|
||||||
|
"""Get user data by ID."""
|
||||||
|
user_key = str(user_id)
|
||||||
|
if user_key in self._data:
|
||||||
|
user_dict = self._data[user_key]
|
||||||
|
return UserData(**user_dict)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def save_user(self, user_data: UserData):
|
||||||
|
"""Save or update user data."""
|
||||||
|
user_key = str(user_data.user_id)
|
||||||
|
|
||||||
|
# If user exists, preserve created_at timestamp
|
||||||
|
if user_key in self._data:
|
||||||
|
user_data.created_at = self._data[user_key]['created_at']
|
||||||
|
|
||||||
|
# Update timestamp
|
||||||
|
user_data.updated_at = datetime.utcnow().isoformat()
|
||||||
|
|
||||||
|
# Save to memory
|
||||||
|
self._data[user_key] = asdict(user_data)
|
||||||
|
|
||||||
|
# Save to disk
|
||||||
|
await self._save_data()
|
||||||
|
|
||||||
|
self.logger.debug(f"Saved user {user_data.username}#{user_data.discriminator} ({user_data.user_id})")
|
||||||
|
|
||||||
|
async def add_server_to_user(self, user_id: int, server_id: int):
|
||||||
|
"""Add a server to user's server list."""
|
||||||
|
user_key = str(user_id)
|
||||||
|
if user_key in self._data:
|
||||||
|
if server_id not in self._data[user_key]['servers']:
|
||||||
|
self._data[user_key]['servers'].append(server_id)
|
||||||
|
self._data[user_key]['updated_at'] = datetime.utcnow().isoformat()
|
||||||
|
await self._save_data()
|
||||||
|
|
||||||
|
async def get_all_users(self) -> List[UserData]:
|
||||||
|
"""Get all users from the database."""
|
||||||
|
return [UserData(**user_dict) for user_dict in self._data.values()]
|
||||||
|
|
||||||
|
async def get_users_by_server(self, server_id: int) -> List[UserData]:
|
||||||
|
"""Get all users that are members of a specific server."""
|
||||||
|
users = []
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
if server_id in user_dict.get('servers', []):
|
||||||
|
users.append(UserData(**user_dict))
|
||||||
|
return users
|
||||||
|
|
||||||
|
async def get_user_count(self) -> int:
|
||||||
|
"""Get total number of users in database."""
|
||||||
|
return len(self._data)
|
||||||
|
|
||||||
|
async def get_server_count(self) -> int:
|
||||||
|
"""Get total number of unique servers."""
|
||||||
|
servers = set()
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
servers.update(user_dict.get('servers', []))
|
||||||
|
return len(servers)
|
||||||
|
|
||||||
|
async def cleanup_old_backups(self, max_backups: int = 10):
|
||||||
|
"""Clean up old backup files, keeping only the most recent ones."""
|
||||||
|
backup_files = sorted(self.backup_path.glob("users_backup_*.json"))
|
||||||
|
|
||||||
|
if len(backup_files) > max_backups:
|
||||||
|
files_to_remove = backup_files[:-max_backups]
|
||||||
|
for file_path in files_to_remove:
|
||||||
|
try:
|
||||||
|
file_path.unlink()
|
||||||
|
self.logger.info(f"Removed old backup: {file_path.name}")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error removing backup {file_path.name}: {e}")
|
||||||
|
|
||||||
|
async def export_to_csv(self, output_path: str):
|
||||||
|
"""Export user data to CSV format."""
|
||||||
|
import csv
|
||||||
|
|
||||||
|
output_path = Path(output_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
|
fieldnames = ['user_id', 'username', 'discriminator', 'display_name',
|
||||||
|
'avatar_url', 'bio', 'status', 'servers', 'created_at', 'updated_at']
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
writer.writeheader()
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
# Convert servers list to string
|
||||||
|
user_dict_copy = user_dict.copy()
|
||||||
|
user_dict_copy['servers'] = ','.join(map(str, user_dict.get('servers', [])))
|
||||||
|
writer.writerow(user_dict_copy)
|
||||||
|
|
||||||
|
self.logger.info(f"Exported {len(self._data)} users to {output_path}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error exporting to CSV: {e}")
|
||||||
|
|
||||||
|
async def get_statistics(self) -> Dict[str, Any]:
|
||||||
|
"""Get database statistics."""
|
||||||
|
stats = {
|
||||||
|
'total_users': await self.get_user_count(),
|
||||||
|
'total_servers': await self.get_server_count(),
|
||||||
|
'database_size': self.database_path.stat().st_size if self.database_path.exists() else 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Most active servers
|
||||||
|
server_counts = {}
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
for server_id in user_dict.get('servers', []):
|
||||||
|
server_counts[server_id] = server_counts.get(server_id, 0) + 1
|
||||||
|
|
||||||
|
stats['most_active_servers'] = sorted(server_counts.items(),
|
||||||
|
key=lambda x: x[1], reverse=True)[:10]
|
||||||
|
|
||||||
|
return stats
|
121
src/config.py
Normal file
121
src/config.py
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
"""
|
||||||
|
Configuration management for Discord Data Collector.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import toml
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Configuration manager for the Discord Data Collector."""
|
||||||
|
|
||||||
|
def __init__(self, config_file: str = "config.toml"):
|
||||||
|
"""Initialize configuration from TOML file and environment variables."""
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Load TOML configuration
|
||||||
|
self.config_file = Path(config_file)
|
||||||
|
self.config_data = self._load_config()
|
||||||
|
|
||||||
|
# Discord settings
|
||||||
|
self.discord_token = os.getenv("DISCORD_TOKEN")
|
||||||
|
if not self.discord_token:
|
||||||
|
raise ValueError("DISCORD_TOKEN environment variable is required")
|
||||||
|
|
||||||
|
# Database settings
|
||||||
|
self.database_path = self.config_data.get("database", {}).get("path", "data/users.json")
|
||||||
|
self.backup_interval = self.config_data.get("database", {}).get("backup_interval", 3600)
|
||||||
|
|
||||||
|
# Collection settings
|
||||||
|
collection_config = self.config_data.get("collection", {})
|
||||||
|
self.collect_profile_pics = collection_config.get("profile_pictures", True)
|
||||||
|
self.collect_bio = collection_config.get("bio", True)
|
||||||
|
self.collect_status = collection_config.get("status", True)
|
||||||
|
self.collect_server_membership = collection_config.get("server_membership", True)
|
||||||
|
|
||||||
|
# Rate limiting settings
|
||||||
|
rate_limit_config = self.config_data.get("rate_limiting", {})
|
||||||
|
self.request_delay = rate_limit_config.get("request_delay", 1.0)
|
||||||
|
self.max_requests_per_minute = rate_limit_config.get("max_requests_per_minute", 30)
|
||||||
|
|
||||||
|
# Monitoring settings
|
||||||
|
monitoring_config = self.config_data.get("monitoring", {})
|
||||||
|
self.target_servers = monitoring_config.get("target_servers", [])
|
||||||
|
self.monitor_all_servers = monitoring_config.get("monitor_all_servers", True)
|
||||||
|
|
||||||
|
# Logging settings
|
||||||
|
logging_config = self.config_data.get("logging", {})
|
||||||
|
self.log_level = logging_config.get("level", "INFO")
|
||||||
|
self.log_file = logging_config.get("file", "logs/collector.log")
|
||||||
|
|
||||||
|
# Ensure directories exist
|
||||||
|
self._ensure_directories()
|
||||||
|
|
||||||
|
def _load_config(self) -> dict:
|
||||||
|
"""Load configuration from TOML file."""
|
||||||
|
if not self.config_file.exists():
|
||||||
|
self._create_default_config()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(self.config_file, 'r') as f:
|
||||||
|
return toml.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading config file: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _create_default_config(self):
|
||||||
|
"""Create a default configuration file."""
|
||||||
|
default_config = {
|
||||||
|
"database": {
|
||||||
|
"path": "data/users.json",
|
||||||
|
"backup_interval": 3600
|
||||||
|
},
|
||||||
|
"collection": {
|
||||||
|
"profile_pictures": True,
|
||||||
|
"bio": True,
|
||||||
|
"status": True,
|
||||||
|
"server_membership": True
|
||||||
|
},
|
||||||
|
"rate_limiting": {
|
||||||
|
"request_delay": 1.0,
|
||||||
|
"max_requests_per_minute": 30
|
||||||
|
},
|
||||||
|
"monitoring": {
|
||||||
|
"target_servers": [],
|
||||||
|
"monitor_all_servers": True
|
||||||
|
},
|
||||||
|
"logging": {
|
||||||
|
"level": "INFO",
|
||||||
|
"file": "logs/collector.log"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create directory if it doesn't exist
|
||||||
|
self.config_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with open(self.config_file, 'w') as f:
|
||||||
|
toml.dump(default_config, f)
|
||||||
|
|
||||||
|
print(f"Created default configuration file: {self.config_file}")
|
||||||
|
|
||||||
|
def _ensure_directories(self):
|
||||||
|
"""Ensure required directories exist."""
|
||||||
|
directories = [
|
||||||
|
Path(self.database_path).parent,
|
||||||
|
Path(self.log_file).parent,
|
||||||
|
Path("data/backups")
|
||||||
|
]
|
||||||
|
|
||||||
|
for directory in directories:
|
||||||
|
directory.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def get_target_servers(self) -> List[int]:
|
||||||
|
"""Get list of target server IDs."""
|
||||||
|
if self.monitor_all_servers:
|
||||||
|
return []
|
||||||
|
return [int(server_id) for server_id in self.target_servers]
|
205
src/database.py
Normal file
205
src/database.py
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
"""
|
||||||
|
JSON database manager for Discord user data storage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional, Any
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UserData:
|
||||||
|
"""Data structure for storing user information."""
|
||||||
|
user_id: int
|
||||||
|
username: str
|
||||||
|
discriminator: str
|
||||||
|
display_name: Optional[str] = None
|
||||||
|
avatar_url: Optional[str] = None
|
||||||
|
banner_url: Optional[str] = None
|
||||||
|
bio: Optional[str] = None
|
||||||
|
status: Optional[str] = None
|
||||||
|
activity: Optional[str] = None
|
||||||
|
servers: List[int] = None
|
||||||
|
created_at: str = None
|
||||||
|
updated_at: str = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.servers is None:
|
||||||
|
self.servers = []
|
||||||
|
|
||||||
|
current_time = datetime.utcnow().isoformat()
|
||||||
|
if self.created_at is None:
|
||||||
|
self.created_at = current_time
|
||||||
|
self.updated_at = current_time
|
||||||
|
|
||||||
|
|
||||||
|
class JSONDatabase:
|
||||||
|
"""JSON-based database for storing Discord user data."""
|
||||||
|
|
||||||
|
def __init__(self, database_path: str):
|
||||||
|
"""Initialize the JSON database."""
|
||||||
|
self.database_path = Path(database_path)
|
||||||
|
self.backup_path = Path("data/backups")
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
self._data: Dict[str, Dict] = {}
|
||||||
|
|
||||||
|
# Ensure database directory exists
|
||||||
|
self.database_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.backup_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Load existing data
|
||||||
|
self._load_data()
|
||||||
|
|
||||||
|
def _load_data(self):
|
||||||
|
"""Load data from JSON file."""
|
||||||
|
if self.database_path.exists():
|
||||||
|
try:
|
||||||
|
with open(self.database_path, 'r', encoding='utf-8') as f:
|
||||||
|
self._data = json.load(f)
|
||||||
|
self.logger.info(f"Loaded {len(self._data)} users from database")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error loading database: {e}")
|
||||||
|
self._data = {}
|
||||||
|
else:
|
||||||
|
self._data = {}
|
||||||
|
self.logger.info("Created new database")
|
||||||
|
|
||||||
|
async def _save_data(self):
|
||||||
|
"""Save data to JSON file."""
|
||||||
|
async with self._lock:
|
||||||
|
try:
|
||||||
|
# Create backup before saving
|
||||||
|
if self.database_path.exists():
|
||||||
|
backup_filename = f"users_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||||
|
backup_path = self.backup_path / backup_filename
|
||||||
|
shutil.copy2(self.database_path, backup_path)
|
||||||
|
|
||||||
|
# Save data
|
||||||
|
with open(self.database_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(self._data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
self.logger.debug(f"Saved {len(self._data)} users to database")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error saving database: {e}")
|
||||||
|
|
||||||
|
async def get_user(self, user_id: int) -> Optional[UserData]:
|
||||||
|
"""Get user data by ID."""
|
||||||
|
user_key = str(user_id)
|
||||||
|
if user_key in self._data:
|
||||||
|
user_dict = self._data[user_key]
|
||||||
|
return UserData(**user_dict)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def save_user(self, user_data: UserData):
|
||||||
|
"""Save or update user data."""
|
||||||
|
user_key = str(user_data.user_id)
|
||||||
|
|
||||||
|
# If user exists, preserve created_at timestamp
|
||||||
|
if user_key in self._data:
|
||||||
|
user_data.created_at = self._data[user_key]['created_at']
|
||||||
|
|
||||||
|
# Update timestamp
|
||||||
|
user_data.updated_at = datetime.utcnow().isoformat()
|
||||||
|
|
||||||
|
# Save to memory
|
||||||
|
self._data[user_key] = asdict(user_data)
|
||||||
|
|
||||||
|
# Save to disk
|
||||||
|
await self._save_data()
|
||||||
|
|
||||||
|
self.logger.debug(f"Saved user {user_data.username}#{user_data.discriminator} ({user_data.user_id})")
|
||||||
|
|
||||||
|
async def add_server_to_user(self, user_id: int, server_id: int):
|
||||||
|
"""Add a server to user's server list."""
|
||||||
|
user_key = str(user_id)
|
||||||
|
if user_key in self._data:
|
||||||
|
if server_id not in self._data[user_key]['servers']:
|
||||||
|
self._data[user_key]['servers'].append(server_id)
|
||||||
|
self._data[user_key]['updated_at'] = datetime.utcnow().isoformat()
|
||||||
|
await self._save_data()
|
||||||
|
|
||||||
|
async def get_all_users(self) -> List[UserData]:
|
||||||
|
"""Get all users from the database."""
|
||||||
|
return [UserData(**user_dict) for user_dict in self._data.values()]
|
||||||
|
|
||||||
|
async def get_users_by_server(self, server_id: int) -> List[UserData]:
|
||||||
|
"""Get all users that are members of a specific server."""
|
||||||
|
users = []
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
if server_id in user_dict.get('servers', []):
|
||||||
|
users.append(UserData(**user_dict))
|
||||||
|
return users
|
||||||
|
|
||||||
|
async def get_user_count(self) -> int:
|
||||||
|
"""Get total number of users in database."""
|
||||||
|
return len(self._data)
|
||||||
|
|
||||||
|
async def get_server_count(self) -> int:
|
||||||
|
"""Get total number of unique servers."""
|
||||||
|
servers = set()
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
servers.update(user_dict.get('servers', []))
|
||||||
|
return len(servers)
|
||||||
|
|
||||||
|
async def cleanup_old_backups(self, max_backups: int = 10):
|
||||||
|
"""Clean up old backup files, keeping only the most recent ones."""
|
||||||
|
backup_files = sorted(self.backup_path.glob("users_backup_*.json"))
|
||||||
|
|
||||||
|
if len(backup_files) > max_backups:
|
||||||
|
files_to_remove = backup_files[:-max_backups]
|
||||||
|
for file_path in files_to_remove:
|
||||||
|
try:
|
||||||
|
file_path.unlink()
|
||||||
|
self.logger.info(f"Removed old backup: {file_path.name}")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error removing backup {file_path.name}: {e}")
|
||||||
|
|
||||||
|
async def export_to_csv(self, output_path: str):
|
||||||
|
"""Export user data to CSV format."""
|
||||||
|
import csv
|
||||||
|
|
||||||
|
output_path = Path(output_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
|
fieldnames = ['user_id', 'username', 'discriminator', 'display_name',
|
||||||
|
'avatar_url', 'bio', 'status', 'servers', 'created_at', 'updated_at']
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
writer.writeheader()
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
# Convert servers list to string
|
||||||
|
user_dict_copy = user_dict.copy()
|
||||||
|
user_dict_copy['servers'] = ','.join(map(str, user_dict.get('servers', [])))
|
||||||
|
writer.writerow(user_dict_copy)
|
||||||
|
|
||||||
|
self.logger.info(f"Exported {len(self._data)} users to {output_path}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error exporting to CSV: {e}")
|
||||||
|
|
||||||
|
async def get_statistics(self) -> Dict[str, Any]:
|
||||||
|
"""Get database statistics."""
|
||||||
|
stats = {
|
||||||
|
'total_users': await self.get_user_count(),
|
||||||
|
'total_servers': await self.get_server_count(),
|
||||||
|
'database_size': self.database_path.stat().st_size if self.database_path.exists() else 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Most active servers
|
||||||
|
server_counts = {}
|
||||||
|
for user_dict in self._data.values():
|
||||||
|
for server_id in user_dict.get('servers', []):
|
||||||
|
server_counts[server_id] = server_counts.get(server_id, 0) + 1
|
||||||
|
|
||||||
|
stats['most_active_servers'] = sorted(server_counts.items(),
|
||||||
|
key=lambda x: x[1], reverse=True)[:10]
|
||||||
|
|
||||||
|
return stats
|
61
src/logger.py
Normal file
61
src/logger.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
"""
|
||||||
|
Logging setup for Discord Data Collector.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logger(log_level: str = "INFO", log_file: str = "logs/collector.log") -> logging.Logger:
|
||||||
|
"""
|
||||||
|
Setup logging configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||||
|
log_file: Path to log file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured logger instance
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Create logs directory if it doesn't exist
|
||||||
|
log_path = Path(log_file)
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create formatter
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Setup root logger
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.setLevel(getattr(logging, log_level.upper()))
|
||||||
|
|
||||||
|
# Clear existing handlers
|
||||||
|
root_logger.handlers.clear()
|
||||||
|
|
||||||
|
# Console handler
|
||||||
|
console_handler = logging.StreamHandler(sys.stdout)
|
||||||
|
console_handler.setLevel(getattr(logging, log_level.upper()))
|
||||||
|
console_handler.setFormatter(formatter)
|
||||||
|
root_logger.addHandler(console_handler)
|
||||||
|
|
||||||
|
# File handler with rotation
|
||||||
|
file_handler = RotatingFileHandler(
|
||||||
|
log_file,
|
||||||
|
maxBytes=10*1024*1024, # 10MB
|
||||||
|
backupCount=5
|
||||||
|
)
|
||||||
|
file_handler.setLevel(getattr(logging, log_level.upper()))
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
root_logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
# Set specific logger levels for discord.py to reduce noise
|
||||||
|
logging.getLogger('discord').setLevel(logging.WARNING)
|
||||||
|
logging.getLogger('discord.http').setLevel(logging.WARNING)
|
||||||
|
logging.getLogger('discord.gateway').setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
return root_logger
|
70
src/rate_limiter.py
Normal file
70
src/rate_limiter.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
"""
|
||||||
|
Rate limiter for Discord API requests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from collections import deque
|
||||||
|
from typing import Optional
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter to prevent hitting Discord API limits."""
|
||||||
|
|
||||||
|
def __init__(self, requests_per_minute: int = 30, delay_between_requests: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize rate limiter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
requests_per_minute: Maximum requests per minute
|
||||||
|
delay_between_requests: Minimum delay between requests in seconds
|
||||||
|
"""
|
||||||
|
self.requests_per_minute = requests_per_minute
|
||||||
|
self.delay_between_requests = delay_between_requests
|
||||||
|
self.request_times = deque()
|
||||||
|
self.last_request_time = 0
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
async def wait(self):
|
||||||
|
"""Wait if necessary to respect rate limits."""
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Remove old requests (older than 1 minute)
|
||||||
|
while self.request_times and current_time - self.request_times[0] > 60:
|
||||||
|
self.request_times.popleft()
|
||||||
|
|
||||||
|
# Check if we're at the rate limit
|
||||||
|
if len(self.request_times) >= self.requests_per_minute:
|
||||||
|
# Wait until the oldest request is more than 1 minute old
|
||||||
|
wait_time = 60 - (current_time - self.request_times[0])
|
||||||
|
if wait_time > 0:
|
||||||
|
self.logger.debug(f"Rate limit reached, waiting {wait_time:.2f} seconds")
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Check minimum delay between requests
|
||||||
|
time_since_last = current_time - self.last_request_time
|
||||||
|
if time_since_last < self.delay_between_requests:
|
||||||
|
wait_time = self.delay_between_requests - time_since_last
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Record this request
|
||||||
|
self.request_times.append(current_time)
|
||||||
|
self.last_request_time = current_time
|
||||||
|
|
||||||
|
def get_stats(self) -> dict:
|
||||||
|
"""Get rate limiter statistics."""
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Clean old requests
|
||||||
|
while self.request_times and current_time - self.request_times[0] > 60:
|
||||||
|
self.request_times.popleft()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'requests_last_minute': len(self.request_times),
|
||||||
|
'requests_per_minute_limit': self.requests_per_minute,
|
||||||
|
'delay_between_requests': self.delay_between_requests,
|
||||||
|
'time_since_last_request': current_time - self.last_request_time
|
||||||
|
}
|
Loading…
Reference in a new issue