From 8edda894db5c34edada33cc3b1356665649b691b Mon Sep 17 00:00:00 2001
From: Xargana <me@xargana.tr>
Date: Sun, 13 Jul 2025 21:04:53 +0300
Subject: [PATCH] Initial commit v2

---
 .env.example        |  13 +++
 cli.py              | 182 ++++++++++++++++++++++++++++++++++++++
 config.toml         |  33 +++++++
 main.py             |  50 +++++++++++
 readme.md           | 208 ++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt    |  20 +++++
 src/__init__.py     |  43 +++++++++
 src/client.py       | 205 +++++++++++++++++++++++++++++++++++++++++++
 src/config.py       | 121 ++++++++++++++++++++++++++
 src/database.py     | 205 +++++++++++++++++++++++++++++++++++++++++++
 src/logger.py       |  61 +++++++++++++
 src/rate_limiter.py |  70 +++++++++++++++
 12 files changed, 1211 insertions(+)
 create mode 100644 .env.example
 create mode 100644 cli.py
 create mode 100644 config.toml
 create mode 100644 main.py
 create mode 100644 readme.md
 create mode 100644 requirements.txt
 create mode 100644 src/__init__.py
 create mode 100644 src/client.py
 create mode 100644 src/config.py
 create mode 100644 src/database.py
 create mode 100644 src/logger.py
 create mode 100644 src/rate_limiter.py

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..4d10986
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,13 @@
+# Discord Data Collector Environment Variables
+# Copy this file to .env and fill in your values
+
+# Discord user token (REQUIRED)
+# WARNING: This should be your user token, not a bot token
+# Keep this secret and never share it publicly
+DISCORD_TOKEN=your_discord_user_token_here
+
+# Optional: Database connection string for future MongoDB integration
+# MONGODB_URI=mongodb://localhost:27017/discord_research
+
+# Optional: Additional API keys for extended functionality
+# BACKUP_WEBHOOK_URL=https://discord.com/api/webhooks/your_webhook_url
\ No newline at end of file
diff --git a/cli.py b/cli.py
new file mode 100644
index 0000000..1b1bf46
--- /dev/null
+++ b/cli.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+"""
+Command-line interface for Discord Data Collector.
+"""
+
+import argparse
+import asyncio
+import json
+import sys
+from pathlib import Path
+
+# Add src to path
+sys.path.append(str(Path(__file__).parent))
+
+from src.config import Config
+from src.database import JSONDatabase
+from src.client import DiscordDataClient
+
+
+async def export_data(format_type: str, output_path: str = None):
+    """Export collected data."""
+    config = Config()
+    database = JSONDatabase(config.database_path)
+    
+    if output_path is None:
+        from datetime import datetime
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        output_path = f"data/export_{timestamp}.{format_type}"
+    
+    if format_type == "csv":
+        await database.export_to_csv(output_path)
+        print(f"Data exported to {output_path}")
+    else:
+        print(f"Unsupported format: {format_type}")
+
+
+async def show_stats():
+    """Show database statistics."""
+    config = Config()
+    database = JSONDatabase(config.database_path)
+    
+    stats = await database.get_statistics()
+    
+    print("\n=== Database Statistics ===")
+    print(f"Total users: {stats['total_users']}")
+    print(f"Total servers: {stats['total_servers']}")
+    print(f"Database size: {stats['database_size']} bytes")
+    
+    if stats['most_active_servers']:
+        print("\nMost active servers:")
+        for server_id, user_count in stats['most_active_servers'][:5]:
+            print(f"  Server {server_id}: {user_count} users")
+
+
+async def search_user(query: str):
+    """Search for users."""
+    config = Config()
+    database = JSONDatabase(config.database_path)
+    
+    all_users = await database.get_all_users()
+    
+    # Search by username or user ID
+    results = []
+    for user in all_users:
+        if (query.lower() in user.username.lower() or 
+            query.lower() in (user.display_name or "").lower() or
+            query == str(user.user_id)):
+            results.append(user)
+    
+    if not results:
+        print("No users found matching the query.")
+        return
+    
+    print(f"\n=== Found {len(results)} users ===")
+    for user in results[:10]:  # Show first 10 results
+        print(f"{user.username}#{user.discriminator} (ID: {user.user_id})")
+        if user.display_name:
+            print(f"  Display name: {user.display_name}")
+        if user.bio:
+            print(f"  Bio: {user.bio[:100]}...")
+        print(f"  Servers: {len(user.servers)}")
+        print(f"  Last updated: {user.updated_at}")
+        print()
+
+
+async def backup_database():
+    """Create a manual backup of the database."""
+    config = Config()
+    database = JSONDatabase(config.database_path)
+    
+    from datetime import datetime
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    backup_path = f"data/backups/manual_backup_{timestamp}.json"
+    
+    # Copy current database
+    import shutil
+    shutil.copy2(database.database_path, backup_path)
+    
+    print(f"Database backed up to {backup_path}")
+
+
+async def cleanup_data():
+    """Clean up old data and backups."""
+    config = Config()
+    database = JSONDatabase(config.database_path)
+    
+    await database.cleanup_old_backups(max_backups=5)
+    print("Cleanup completed")
+
+
+async def test_connection():
+    """Test Discord connection."""
+    try:
+        config = Config()
+        database = JSONDatabase(config.database_path)
+        client = DiscordDataClient(config, database)
+        
+        print("Testing Discord connection...")
+        
+        # This will test the connection without starting the full bot
+        await client.login(config.discord_token)
+        user_info = client.user
+        
+        print(f"✓ Successfully connected as {user_info.name}#{user_info.discriminator}")
+        print(f"✓ User ID: {user_info.id}")
+        
+        await client.close()
+        
+    except Exception as e:
+        print(f"✗ Connection failed: {e}")
+        sys.exit(1)
+
+
+def main():
+    """Main CLI entry point."""
+    parser = argparse.ArgumentParser(description="Discord Data Collector CLI")
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+    
+    # Export command
+    export_parser = subparsers.add_parser("export", help="Export collected data")
+    export_parser.add_argument("format", choices=["csv"], help="Export format")
+    export_parser.add_argument("-o", "--output", help="Output file path")
+    
+    # Stats command
+    subparsers.add_parser("stats", help="Show database statistics")
+    
+    # Search command
+    search_parser = subparsers.add_parser("search", help="Search for users")
+    search_parser.add_argument("query", help="Search query (username or user ID)")
+    
+    # Backup command
+    subparsers.add_parser("backup", help="Create manual database backup")
+    
+    # Cleanup command
+    subparsers.add_parser("cleanup", help="Clean up old data and backups")
+    
+    # Test command
+    subparsers.add_parser("test", help="Test Discord connection")
+    
+    args = parser.parse_args()
+    
+    if not args.command:
+        parser.print_help()
+        return
+    
+    # Run the appropriate command
+    if args.command == "export":
+        asyncio.run(export_data(args.format, args.output))
+    elif args.command == "stats":
+        asyncio.run(show_stats())
+    elif args.command == "search":
+        asyncio.run(search_user(args.query))
+    elif args.command == "backup":
+        asyncio.run(backup_database())
+    elif args.command == "cleanup":
+        asyncio.run(cleanup_data())
+    elif args.command == "test":
+        asyncio.run(test_connection())
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/config.toml b/config.toml
new file mode 100644
index 0000000..e9c3209
--- /dev/null
+++ b/config.toml
@@ -0,0 +1,33 @@
+# Discord Data Collector Configuration
+
+[database]
+# JSON database file path
+path = "data/users.json"
+# Backup interval in seconds (3600 = 1 hour)
+backup_interval = 3600
+
+[collection]
+# What data to collect
+profile_pictures = true
+bio = true
+status = true
+server_membership = true
+
+[rate_limiting]
+# Delay between API requests in seconds
+request_delay = 1.0
+# Maximum requests per minute
+max_requests_per_minute = 30
+
+[monitoring]
+# List of specific server IDs to monitor (leave empty to monitor all)
+# Example: target_servers = [123456789, 987654321]
+target_servers = []
+# Monitor all servers the account is in
+monitor_all_servers = true
+
+[logging]
+# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
+level = "INFO"
+# Log file path
+file = "logs/collector.log"
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..d9a2a78
--- /dev/null
+++ b/main.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+"""
+Discord User Data Collector
+Main application entry point for collecting Discord user data for research purposes.
+"""
+
+import asyncio
+import logging
+import sys
+from pathlib import Path
+
+from src.client import DiscordDataClient
+from src.config import Config
+from src.database import JSONDatabase
+from src.logger import setup_logger
+
+
+async def main():
+    """Main application entry point."""
+    try:
+        # Setup configuration
+        config = Config()
+        
+        # Setup logging
+        logger = setup_logger(config.log_level, config.log_file)
+        logger.info("Starting Discord Data Collector")
+        
+        # Initialize database
+        database = JSONDatabase(config.database_path)
+        
+        # Initialize Discord client
+        client = DiscordDataClient(config, database)
+        
+        # Start the client
+        logger.info("Starting Discord client...")
+        await client.start(config.discord_token)
+        
+    except KeyboardInterrupt:
+        logger.info("Received keyboard interrupt, shutting down...")
+    except Exception as e:
+        logger.error(f"Fatal error: {e}", exc_info=True)
+        sys.exit(1)
+    finally:
+        if 'client' in locals():
+            await client.close()
+        logger.info("Application shutdown complete")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..dc2afab
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,208 @@
+# ⚠️ Warning! ai slop.
+
+
+# Discord Data Collector
+
+A Python application for collecting Discord user data for research purposes, specifically designed to study information propagation patterns in Discord communities.
+
+## Important Disclaimers
+
+- **Terms of Service**: This application uses self-botting, which violates Discord's Terms of Service and may result in account suspension.
+- **Educational Use Only**: This tool is intended solely for educational and research purposes.
+- **Privacy Considerations**: Always respect user privacy and obtain proper consent when collecting data.
+- **Legal Compliance**: Ensure compliance with applicable data protection laws (GDPR, CCPA, etc.).
+
+## Features
+
+- **User Data Collection**: Automatically collects usernames, profile pictures, bios, status, and server memberships
+- **Message Monitoring**: Processes messages from monitored servers to identify active users
+- **Rate Limiting**: Built-in rate limiting to avoid hitting Discord API limits
+- **Flexible Configuration**: Easy configuration via TOML and environment files
+- **Data Export**: Export collected data to CSV format
+- **Database Management**: JSON-based storage with automatic backups
+- **CLI Tools**: Command-line interface for data management and analysis
+
+## Installation
+
+1. **Clone the repository**:
+   ```bash
+   git clone <repository-url>
+   cd discord-data-collector
+   ```
+
+2. **Install dependencies**:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+3. **Create configuration files**:
+   ```bash
+   cp .env.example .env
+   # Edit .env with your Discord token
+   ```
+
+4. **Configure settings**:
+   - Edit `config.toml` to adjust collection settings
+   - Add your Discord user token to `.env`
+
+## Configuration
+
+### Environment Variables (.env)
+
+```env
+# Your Discord user token (REQUIRED)
+DISCORD_TOKEN=your_discord_user_token_here
+```
+
+### Configuration File (config.toml)
+
+```toml
+[database]
+path = "data/users.json"
+backup_interval = 3600
+
+[collection]
+profile_pictures = true
+bio = true
+status = true
+server_membership = true
+
+[rate_limiting]
+request_delay = 1.0
+max_requests_per_minute = 30
+
+[monitoring]
+target_servers = []  # Empty = monitor all servers
+monitor_all_servers = true
+
+[logging]
+level = "INFO"
+file = "logs/collector.log"
+```
+
+## Usage
+
+### Running the Collector
+
+```bash
+# Start the data collector
+python main.py
+```
+
+### CLI Commands
+
+```bash
+# Show database statistics
+python cli.py stats
+
+# Search for users
+python cli.py search "username"
+
+# Export data to CSV
+python cli.py export csv -o exported_data.csv
+
+# Test Discord connection
+python cli.py test
+
+# Create manual backup
+python cli.py backup
+
+# Clean up old backups
+python cli.py cleanup
+```
+
+## Project Structure
+
+```
+discord-data-collector/
+├── main.py                 # Main application entry point
+├── cli.py                  # Command-line interface
+├── config.toml             # Configuration file
+├── .env                    # Environment variables
+├── requirements.txt        # Python dependencies
+├── src/
+│   ├── __init__.py
+│   ├── client.py          # Discord client implementation
+│   ├── config.py          # Configuration management
+│   ├── database.py        # JSON database manager
+│   ├── rate_limiter.py    # Rate limiting utilities
+│   └── logger.py          # Logging setup
+├── data/
+│   ├── users.json         # User database
+│   └── backups/           # Database backups
+└── logs/
+    └── collector.log      # Application logs
+```
+
+## Data Structure
+
+Each user entry contains:
+
+```json
+{
+  "user_id": 123456789,
+  "username": "example_user",
+  "discriminator": "1234",
+  "display_name": "Example User",
+  "avatar_url": "https://cdn.discordapp.com/avatars/...",
+  "banner_url": "https://cdn.discordapp.com/banners/...",
+  "bio": "User's about me section",
+  "status": "online",
+  "activity": "Playing a game",
+  "servers": [111111111, 222222222],
+  "created_at": "2024-01-01T00:00:00",
+  "updated_at": "2024-01-01T12:00:00"
+}
+```
+
+## Features in Detail
+
+### Rate Limiting
+- Configurable request delays
+- Per-minute request limits
+- Automatic backoff on rate limit hits
+
+### Data Collection
+- Real-time message monitoring
+- Member list scanning
+- Profile updates tracking
+- Server membership tracking
+
+### Database Management
+- Automatic backups
+- Data deduplication
+- Export capabilities
+- Statistics generation
+
+### Logging
+- Configurable log levels
+- File rotation
+- Separate Discord.py logging
+
+## Future Enhancements
+
+- MongoDB integration for better scalability
+- Web dashboard for data visualization
+- Advanced search and filtering
+- Data analysis tools
+- Network analysis features
+
+## Contributing
+
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Add tests if applicable
+5. Submit a pull request
+
+## License
+
+This project is for educational purposes only. Use responsibly and in compliance with applicable laws and terms of service.
+
+## Support
+
+For issues or questions, please create an issue in the repository.
+
+---
+
+**Remember**: This tool is for educational research only. Always respect user privacy and platform terms of service.
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cf0a2b2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,20 @@
+# Discord Data Collector Requirements
+
+# Discord self-bot library
+discord.py-self>=2.0.0
+
+# Configuration management
+python-dotenv>=1.0.0
+toml>=0.10.2
+
+# Database (for future MongoDB integration)
+pymongo>=4.0.0
+
+# Async utilities
+asyncio-throttle>=1.0.0
+
+# Data processing
+pandas>=1.5.0
+
+# Logging
+colorlog>=6.0.0
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..6331612
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,43 @@
+"""
+Discord Data Collector - A tool for collecting Discord user data for research purposes.
+
+This package provides functionality to collect user data from Discord servers
+for academic research, particularly focused on studying information propagation
+and community dynamics.
+
+Components:
+- client: Discord client implementation
+- config: Configuration management
+- database: Data storage and management
+- rate_limiter: API rate limiting
+- logger: Logging utilities
+
+Usage:
+    from src.client import DiscordDataClient
+    from src.config import Config
+    from src.database import JSONDatabase
+    
+    config = Config()
+    database = JSONDatabase(config.database_path)
+    client = DiscordDataClient(config, database)
+"""
+
+__version__ = "1.0.0"
+__author__ = "Research Team"
+__description__ = "Discord Data Collector for Research Purposes"
+
+# Import main classes for easier access
+from .client import DiscordDataClient
+from .config import Config
+from .database import JSONDatabase, UserData
+from .rate_limiter import RateLimiter
+from .logger import setup_logger
+
+__all__ = [
+    'DiscordDataClient',
+    'Config',
+    'JSONDatabase',
+    'UserData',
+    'RateLimiter',
+    'setup_logger'
+]
\ No newline at end of file
diff --git a/src/client.py b/src/client.py
new file mode 100644
index 0000000..24ac105
--- /dev/null
+++ b/src/client.py
@@ -0,0 +1,205 @@
+"""
+JSON database manager for Discord user data storage.
+"""
+
+import json
+import asyncio
+import shutil
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, asdict
+import logging
+
+
+@dataclass
+class UserData:
+    """Data structure for storing user information."""
+    user_id: int
+    username: str
+    discriminator: str
+    display_name: Optional[str] = None
+    avatar_url: Optional[str] = None
+    banner_url: Optional[str] = None
+    bio: Optional[str] = None
+    status: Optional[str] = None
+    activity: Optional[str] = None
+    servers: List[int] = None
+    created_at: str = None
+    updated_at: str = None
+    
+    def __post_init__(self):
+        if self.servers is None:
+            self.servers = []
+        
+        current_time = datetime.utcnow().isoformat()
+        if self.created_at is None:
+            self.created_at = current_time
+        self.updated_at = current_time
+
+
+class JSONDatabase:
+    """JSON-based database for storing Discord user data."""
+    
+    def __init__(self, database_path: str):
+        """Initialize the JSON database."""
+        self.database_path = Path(database_path)
+        self.backup_path = Path("data/backups")
+        self.logger = logging.getLogger(__name__)
+        self._lock = asyncio.Lock()
+        self._data: Dict[str, Dict] = {}
+        
+        # Ensure database directory exists
+        self.database_path.parent.mkdir(parents=True, exist_ok=True)
+        self.backup_path.mkdir(parents=True, exist_ok=True)
+        
+        # Load existing data
+        self._load_data()
+    
+    def _load_data(self):
+        """Load data from JSON file."""
+        if self.database_path.exists():
+            try:
+                with open(self.database_path, 'r', encoding='utf-8') as f:
+                    self._data = json.load(f)
+                self.logger.info(f"Loaded {len(self._data)} users from database")
+            except Exception as e:
+                self.logger.error(f"Error loading database: {e}")
+                self._data = {}
+        else:
+            self._data = {}
+            self.logger.info("Created new database")
+    
+    async def _save_data(self):
+        """Save data to JSON file."""
+        async with self._lock:
+            try:
+                # Create backup before saving
+                if self.database_path.exists():
+                    backup_filename = f"users_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+                    backup_path = self.backup_path / backup_filename
+                    shutil.copy2(self.database_path, backup_path)
+                
+                # Save data
+                with open(self.database_path, 'w', encoding='utf-8') as f:
+                    json.dump(self._data, f, indent=2, ensure_ascii=False)
+                
+                self.logger.debug(f"Saved {len(self._data)} users to database")
+                
+            except Exception as e:
+                self.logger.error(f"Error saving database: {e}")
+    
+    async def get_user(self, user_id: int) -> Optional[UserData]:
+        """Get user data by ID."""
+        user_key = str(user_id)
+        if user_key in self._data:
+            user_dict = self._data[user_key]
+            return UserData(**user_dict)
+        return None
+    
+    async def save_user(self, user_data: UserData):
+        """Save or update user data."""
+        user_key = str(user_data.user_id)
+        
+        # If user exists, preserve created_at timestamp
+        if user_key in self._data:
+            user_data.created_at = self._data[user_key]['created_at']
+        
+        # Update timestamp
+        user_data.updated_at = datetime.utcnow().isoformat()
+        
+        # Save to memory
+        self._data[user_key] = asdict(user_data)
+        
+        # Save to disk
+        await self._save_data()
+        
+        self.logger.debug(f"Saved user {user_data.username}#{user_data.discriminator} ({user_data.user_id})")
+    
+    async def add_server_to_user(self, user_id: int, server_id: int):
+        """Add a server to user's server list."""
+        user_key = str(user_id)
+        if user_key in self._data:
+            if server_id not in self._data[user_key]['servers']:
+                self._data[user_key]['servers'].append(server_id)
+                self._data[user_key]['updated_at'] = datetime.utcnow().isoformat()
+                await self._save_data()
+    
+    async def get_all_users(self) -> List[UserData]:
+        """Get all users from the database."""
+        return [UserData(**user_dict) for user_dict in self._data.values()]
+    
+    async def get_users_by_server(self, server_id: int) -> List[UserData]:
+        """Get all users that are members of a specific server."""
+        users = []
+        for user_dict in self._data.values():
+            if server_id in user_dict.get('servers', []):
+                users.append(UserData(**user_dict))
+        return users
+    
+    async def get_user_count(self) -> int:
+        """Get total number of users in database."""
+        return len(self._data)
+    
+    async def get_server_count(self) -> int:
+        """Get total number of unique servers."""
+        servers = set()
+        for user_dict in self._data.values():
+            servers.update(user_dict.get('servers', []))
+        return len(servers)
+    
+    async def cleanup_old_backups(self, max_backups: int = 10):
+        """Clean up old backup files, keeping only the most recent ones."""
+        backup_files = sorted(self.backup_path.glob("users_backup_*.json"))
+        
+        if len(backup_files) > max_backups:
+            files_to_remove = backup_files[:-max_backups]
+            for file_path in files_to_remove:
+                try:
+                    file_path.unlink()
+                    self.logger.info(f"Removed old backup: {file_path.name}")
+                except Exception as e:
+                    self.logger.error(f"Error removing backup {file_path.name}: {e}")
+    
+    async def export_to_csv(self, output_path: str):
+        """Export user data to CSV format."""
+        import csv
+        
+        output_path = Path(output_path)
+        
+        try:
+            with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
+                fieldnames = ['user_id', 'username', 'discriminator', 'display_name', 
+                             'avatar_url', 'bio', 'status', 'servers', 'created_at', 'updated_at']
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                
+                writer.writeheader()
+                for user_dict in self._data.values():
+                    # Convert servers list to string
+                    user_dict_copy = user_dict.copy()
+                    user_dict_copy['servers'] = ','.join(map(str, user_dict.get('servers', [])))
+                    writer.writerow(user_dict_copy)
+            
+            self.logger.info(f"Exported {len(self._data)} users to {output_path}")
+            
+        except Exception as e:
+            self.logger.error(f"Error exporting to CSV: {e}")
+    
+    async def get_statistics(self) -> Dict[str, Any]:
+        """Get database statistics."""
+        stats = {
+            'total_users': await self.get_user_count(),
+            'total_servers': await self.get_server_count(),
+            'database_size': self.database_path.stat().st_size if self.database_path.exists() else 0
+        }
+        
+        # Most active servers
+        server_counts = {}
+        for user_dict in self._data.values():
+            for server_id in user_dict.get('servers', []):
+                server_counts[server_id] = server_counts.get(server_id, 0) + 1
+        
+        stats['most_active_servers'] = sorted(server_counts.items(), 
+                                            key=lambda x: x[1], reverse=True)[:10]
+        
+        return stats
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..c83b23f
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,121 @@
+"""
+Configuration management for Discord Data Collector.
+"""
+
+import os
+import toml
+from pathlib import Path
+from typing import List, Optional
+from dotenv import load_dotenv
+
+
+class Config:
+    """Configuration manager for the Discord Data Collector."""
+    
+    def __init__(self, config_file: str = "config.toml"):
+        """Initialize configuration from TOML file and environment variables."""
+        
+        # Load environment variables from .env file
+        load_dotenv()
+        
+        # Load TOML configuration
+        self.config_file = Path(config_file)
+        self.config_data = self._load_config()
+        
+        # Discord settings
+        self.discord_token = os.getenv("DISCORD_TOKEN")
+        if not self.discord_token:
+            raise ValueError("DISCORD_TOKEN environment variable is required")
+        
+        # Database settings
+        self.database_path = self.config_data.get("database", {}).get("path", "data/users.json")
+        self.backup_interval = self.config_data.get("database", {}).get("backup_interval", 3600)
+        
+        # Collection settings
+        collection_config = self.config_data.get("collection", {})
+        self.collect_profile_pics = collection_config.get("profile_pictures", True)
+        self.collect_bio = collection_config.get("bio", True)
+        self.collect_status = collection_config.get("status", True)
+        self.collect_server_membership = collection_config.get("server_membership", True)
+        
+        # Rate limiting settings
+        rate_limit_config = self.config_data.get("rate_limiting", {})
+        self.request_delay = rate_limit_config.get("request_delay", 1.0)
+        self.max_requests_per_minute = rate_limit_config.get("max_requests_per_minute", 30)
+        
+        # Monitoring settings
+        monitoring_config = self.config_data.get("monitoring", {})
+        self.target_servers = monitoring_config.get("target_servers", [])
+        self.monitor_all_servers = monitoring_config.get("monitor_all_servers", True)
+        
+        # Logging settings
+        logging_config = self.config_data.get("logging", {})
+        self.log_level = logging_config.get("level", "INFO")
+        self.log_file = logging_config.get("file", "logs/collector.log")
+        
+        # Ensure directories exist
+        self._ensure_directories()
+    
+    def _load_config(self) -> dict:
+        """Load configuration from TOML file."""
+        if not self.config_file.exists():
+            self._create_default_config()
+        
+        try:
+            with open(self.config_file, 'r') as f:
+                return toml.load(f)
+        except Exception as e:
+            print(f"Error loading config file: {e}")
+            return {}
+    
+    def _create_default_config(self):
+        """Create a default configuration file."""
+        default_config = {
+            "database": {
+                "path": "data/users.json",
+                "backup_interval": 3600
+            },
+            "collection": {
+                "profile_pictures": True,
+                "bio": True,
+                "status": True,
+                "server_membership": True
+            },
+            "rate_limiting": {
+                "request_delay": 1.0,
+                "max_requests_per_minute": 30
+            },
+            "monitoring": {
+                "target_servers": [],
+                "monitor_all_servers": True
+            },
+            "logging": {
+                "level": "INFO",
+                "file": "logs/collector.log"
+            }
+        }
+        
+        # Create directory if it doesn't exist
+        self.config_file.parent.mkdir(parents=True, exist_ok=True)
+        
+        with open(self.config_file, 'w') as f:
+            toml.dump(default_config, f)
+        
+        print(f"Created default configuration file: {self.config_file}")
+    
+    def _ensure_directories(self):
+        """Ensure required directories exist."""
+        directories = [
+            Path(self.database_path).parent,
+            Path(self.log_file).parent,
+            Path("data/backups")
+        ]
+        
+        for directory in directories:
+            directory.mkdir(parents=True, exist_ok=True)
+    
+    def get_target_servers(self) -> List[int]:
+        """Get list of target server IDs."""
+        if self.monitor_all_servers:
+            return []
+        return [int(server_id) for server_id in self.target_servers]
\ No newline at end of file
diff --git a/src/database.py b/src/database.py
new file mode 100644
index 0000000..24ac105
--- /dev/null
+++ b/src/database.py
@@ -0,0 +1,205 @@
+"""
+JSON database manager for Discord user data storage.
+"""
+
+import json
+import asyncio
+import shutil
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, asdict
+import logging
+
+
+@dataclass
+class UserData:
+    """Data structure for storing user information."""
+    user_id: int
+    username: str
+    discriminator: str
+    display_name: Optional[str] = None
+    avatar_url: Optional[str] = None
+    banner_url: Optional[str] = None
+    bio: Optional[str] = None
+    status: Optional[str] = None
+    activity: Optional[str] = None
+    servers: List[int] = None
+    created_at: str = None
+    updated_at: str = None
+    
+    def __post_init__(self):
+        if self.servers is None:
+            self.servers = []
+        
+        current_time = datetime.utcnow().isoformat()
+        if self.created_at is None:
+            self.created_at = current_time
+        self.updated_at = current_time
+
+
+class JSONDatabase:
+    """JSON-based database for storing Discord user data."""
+    
+    def __init__(self, database_path: str):
+        """Initialize the JSON database."""
+        self.database_path = Path(database_path)
+        self.backup_path = Path("data/backups")
+        self.logger = logging.getLogger(__name__)
+        self._lock = asyncio.Lock()
+        self._data: Dict[str, Dict] = {}
+        
+        # Ensure database directory exists
+        self.database_path.parent.mkdir(parents=True, exist_ok=True)
+        self.backup_path.mkdir(parents=True, exist_ok=True)
+        
+        # Load existing data
+        self._load_data()
+    
+    def _load_data(self):
+        """Load data from JSON file."""
+        if self.database_path.exists():
+            try:
+                with open(self.database_path, 'r', encoding='utf-8') as f:
+                    self._data = json.load(f)
+                self.logger.info(f"Loaded {len(self._data)} users from database")
+            except Exception as e:
+                self.logger.error(f"Error loading database: {e}")
+                self._data = {}
+        else:
+            self._data = {}
+            self.logger.info("Created new database")
+    
+    async def _save_data(self):
+        """Save data to JSON file."""
+        async with self._lock:
+            try:
+                # Create backup before saving
+                if self.database_path.exists():
+                    backup_filename = f"users_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+                    backup_path = self.backup_path / backup_filename
+                    shutil.copy2(self.database_path, backup_path)
+                
+                # Save data
+                with open(self.database_path, 'w', encoding='utf-8') as f:
+                    json.dump(self._data, f, indent=2, ensure_ascii=False)
+                
+                self.logger.debug(f"Saved {len(self._data)} users to database")
+                
+            except Exception as e:
+                self.logger.error(f"Error saving database: {e}")
+    
+    async def get_user(self, user_id: int) -> Optional[UserData]:
+        """Get user data by ID."""
+        user_key = str(user_id)
+        if user_key in self._data:
+            user_dict = self._data[user_key]
+            return UserData(**user_dict)
+        return None
+    
+    async def save_user(self, user_data: UserData):
+        """Save or update user data."""
+        user_key = str(user_data.user_id)
+        
+        # If user exists, preserve created_at timestamp
+        if user_key in self._data:
+            user_data.created_at = self._data[user_key]['created_at']
+        
+        # Update timestamp
+        user_data.updated_at = datetime.utcnow().isoformat()
+        
+        # Save to memory
+        self._data[user_key] = asdict(user_data)
+        
+        # Save to disk
+        await self._save_data()
+        
+        self.logger.debug(f"Saved user {user_data.username}#{user_data.discriminator} ({user_data.user_id})")
+    
+    async def add_server_to_user(self, user_id: int, server_id: int):
+        """Add a server to user's server list."""
+        user_key = str(user_id)
+        if user_key in self._data:
+            if server_id not in self._data[user_key]['servers']:
+                self._data[user_key]['servers'].append(server_id)
+                self._data[user_key]['updated_at'] = datetime.utcnow().isoformat()
+                await self._save_data()
+    
+    async def get_all_users(self) -> List[UserData]:
+        """Get all users from the database."""
+        return [UserData(**user_dict) for user_dict in self._data.values()]
+    
+    async def get_users_by_server(self, server_id: int) -> List[UserData]:
+        """Get all users that are members of a specific server."""
+        users = []
+        for user_dict in self._data.values():
+            if server_id in user_dict.get('servers', []):
+                users.append(UserData(**user_dict))
+        return users
+    
+    async def get_user_count(self) -> int:
+        """Get total number of users in database."""
+        return len(self._data)
+    
+    async def get_server_count(self) -> int:
+        """Get total number of unique servers."""
+        servers = set()
+        for user_dict in self._data.values():
+            servers.update(user_dict.get('servers', []))
+        return len(servers)
+    
+    async def cleanup_old_backups(self, max_backups: int = 10):
+        """Clean up old backup files, keeping only the most recent ones."""
+        backup_files = sorted(self.backup_path.glob("users_backup_*.json"))
+        
+        if len(backup_files) > max_backups:
+            files_to_remove = backup_files[:-max_backups]
+            for file_path in files_to_remove:
+                try:
+                    file_path.unlink()
+                    self.logger.info(f"Removed old backup: {file_path.name}")
+                except Exception as e:
+                    self.logger.error(f"Error removing backup {file_path.name}: {e}")
+    
+    async def export_to_csv(self, output_path: str):
+        """Export user data to CSV format."""
+        import csv
+        
+        output_path = Path(output_path)
+        
+        try:
+            with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
+                fieldnames = ['user_id', 'username', 'discriminator', 'display_name', 
+                             'avatar_url', 'bio', 'status', 'servers', 'created_at', 'updated_at']
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                
+                writer.writeheader()
+                for user_dict in self._data.values():
+                    # Convert servers list to string
+                    user_dict_copy = user_dict.copy()
+                    user_dict_copy['servers'] = ','.join(map(str, user_dict.get('servers', [])))
+                    writer.writerow(user_dict_copy)
+            
+            self.logger.info(f"Exported {len(self._data)} users to {output_path}")
+            
+        except Exception as e:
+            self.logger.error(f"Error exporting to CSV: {e}")
+    
+    async def get_statistics(self) -> Dict[str, Any]:
+        """Get database statistics."""
+        stats = {
+            'total_users': await self.get_user_count(),
+            'total_servers': await self.get_server_count(),
+            'database_size': self.database_path.stat().st_size if self.database_path.exists() else 0
+        }
+        
+        # Most active servers
+        server_counts = {}
+        for user_dict in self._data.values():
+            for server_id in user_dict.get('servers', []):
+                server_counts[server_id] = server_counts.get(server_id, 0) + 1
+        
+        stats['most_active_servers'] = sorted(server_counts.items(), 
+                                            key=lambda x: x[1], reverse=True)[:10]
+        
+        return stats
\ No newline at end of file
diff --git a/src/logger.py b/src/logger.py
new file mode 100644
index 0000000..3a3ec31
--- /dev/null
+++ b/src/logger.py
@@ -0,0 +1,61 @@
+"""
+Logging setup for Discord Data Collector.
+"""
+
+import logging
+import sys
+from pathlib import Path
+from logging.handlers import RotatingFileHandler
+
+
+def setup_logger(log_level: str = "INFO", log_file: str = "logs/collector.log") -> logging.Logger:
+    """
+    Setup logging configuration.
+    
+    Args:
+        log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+        log_file: Path to log file
+    
+    Returns:
+        Configured logger instance
+    """
+    
+    # Create logs directory if it doesn't exist
+    log_path = Path(log_file)
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Create formatter
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+    
+    # Setup root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(getattr(logging, log_level.upper()))
+    
+    # Clear existing handlers
+    root_logger.handlers.clear()
+    
+    # Console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(getattr(logging, log_level.upper()))
+    console_handler.setFormatter(formatter)
+    root_logger.addHandler(console_handler)
+    
+    # File handler with rotation
+    file_handler = RotatingFileHandler(
+        log_file,
+        maxBytes=10*1024*1024,  # 10MB
+        backupCount=5
+    )
+    file_handler.setLevel(getattr(logging, log_level.upper()))
+    file_handler.setFormatter(formatter)
+    root_logger.addHandler(file_handler)
+    
+    # Set specific logger levels for discord.py to reduce noise
+    logging.getLogger('discord').setLevel(logging.WARNING)
+    logging.getLogger('discord.http').setLevel(logging.WARNING)
+    logging.getLogger('discord.gateway').setLevel(logging.WARNING)
+    
+    return root_logger
\ No newline at end of file
diff --git a/src/rate_limiter.py b/src/rate_limiter.py
new file mode 100644
index 0000000..44629c3
--- /dev/null
+++ b/src/rate_limiter.py
@@ -0,0 +1,70 @@
+"""
+Rate limiter for Discord API requests.
+"""
+
+import asyncio
+import time
+from collections import deque
+from typing import Optional
+import logging
+
+
+class RateLimiter:
+    """Rate limiter to prevent hitting Discord API limits."""
+    
+    def __init__(self, requests_per_minute: int = 30, delay_between_requests: float = 1.0):
+        """
+        Initialize rate limiter.
+        
+        Args:
+            requests_per_minute: Maximum requests per minute
+            delay_between_requests: Minimum delay between requests in seconds
+        """
+        self.requests_per_minute = requests_per_minute
+        self.delay_between_requests = delay_between_requests
+        self.request_times = deque()
+        self.last_request_time = 0
+        self.logger = logging.getLogger(__name__)
+    
+    async def wait(self):
+        """Wait if necessary to respect rate limits."""
+        current_time = time.time()
+        
+        # Remove old requests (older than 1 minute)
+        while self.request_times and current_time - self.request_times[0] > 60:
+            self.request_times.popleft()
+        
+        # Check if we're at the rate limit
+        if len(self.request_times) >= self.requests_per_minute:
+            # Wait until the oldest request is more than 1 minute old
+            wait_time = 60 - (current_time - self.request_times[0])
+            if wait_time > 0:
+                self.logger.debug(f"Rate limit reached, waiting {wait_time:.2f} seconds")
+                await asyncio.sleep(wait_time)
+                current_time = time.time()
+        
+        # Check minimum delay between requests
+        time_since_last = current_time - self.last_request_time
+        if time_since_last < self.delay_between_requests:
+            wait_time = self.delay_between_requests - time_since_last
+            await asyncio.sleep(wait_time)
+            current_time = time.time()
+        
+        # Record this request
+        self.request_times.append(current_time)
+        self.last_request_time = current_time
+    
+    def get_stats(self) -> dict:
+        """Get rate limiter statistics."""
+        current_time = time.time()
+        
+        # Clean old requests
+        while self.request_times and current_time - self.request_times[0] > 60:
+            self.request_times.popleft()
+        
+        return {
+            'requests_last_minute': len(self.request_times),
+            'requests_per_minute_limit': self.requests_per_minute,
+            'delay_between_requests': self.delay_between_requests,
+            'time_since_last_request': current_time - self.last_request_time
+        }
\ No newline at end of file