-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprepare_kaggle_dataset.py
More file actions
108 lines (88 loc) · 3.09 KB
/
prepare_kaggle_dataset.py
File metadata and controls
108 lines (88 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
"""
Kaggle Dataset Preparation Script
Packages agent code for upload to Kaggle
"""
import os
import zipfile
import shutil
from pathlib import Path
def create_kaggle_dataset():
"""Create a zip file with all necessary code for Kaggle"""
# Define directories and files to include
directories = [
"agent",
"utils",
"data_prep"
]
files = [
"kaggle_llm_engine.py",
"test_server.py",
"requirements.txt",
"README.md"
]
# Create output directory
output_dir = Path("kaggle_dataset")
output_dir.mkdir(exist_ok=True)
# Create zip file
zip_path = output_dir / "telco_agent_code.zip"
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Add directories
for directory in directories:
if os.path.exists(directory):
for root, dirs, files_in_dir in os.walk(directory):
for file in files_in_dir:
if file.endswith('.py'):
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, ".")
zipf.write(file_path, arcname)
print(f"Added: {arcname}")
# Add individual files
for file in files:
if os.path.exists(file):
zipf.write(file, file)
print(f"Added: {file}")
# Create dataset metadata
metadata = {
"title": "Telco Troubleshooting Agent Code",
"description": "Complete agent implementation for Telco Troubleshooting Agentic Challenge",
"keywords": ["telco", "troubleshooting", "agent", "llm", "react"],
"licenses": ["mit"]
}
# Save metadata
import json
with open(output_dir / "dataset-metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
print(f"\nKaggle dataset created: {zip_path}")
print(f"Size: {zip_path.stat().st_size / 1024 / 1024:.1f} MB")
print(f"Files included: {len(zipf.namelist())}")
# Create upload instructions
instructions = """
# Kaggle Dataset Upload Instructions
## Files Created:
- telco_agent_code.zip: Contains all agent code
- dataset-metadata.json: Dataset metadata
## Upload Steps:
1. Go to https://www.kaggle.com/datasets
2. Click "New Dataset"
3. Upload telco_agent_code.zip
4. Copy metadata from dataset-metadata.json
5. Set as private dataset
6. Note the dataset path (e.g., /kaggle/input/username/telco-agent-code)
## Usage in Notebook:
```python
# Add dataset to path
import sys
sys.path.append('/kaggle/input/username/telco-agent-code')
# Import modules
from kaggle_llm_engine import create_kaggle_llm_engine
from agent.react_loop import ReActAgent
```
"""
with open(output_dir / "upload_instructions.md", "w") as f:
f.write(instructions)
print(f"Upload instructions saved: {output_dir / 'upload_instructions.md'}")
return zip_path
if __name__ == "__main__":
zip_path = create_kaggle_dataset()
print(f"\nReady for Kaggle upload: {zip_path}")