-
-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmain.py
122 lines (101 loc) · 4.77 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import uvicorn
import argparse
import asyncio
import subprocess
from pathlib import Path
from src.api.app import app
from src.processing.document_processor import DocumentProcessor
from src.processing.markdown_converter import MarkdownConverter
from src.processing.file_update_handler import FileUpdateHandler
from src.core.services.embedding import EmbeddingService
from src.core.services.db_service import DatabaseService
from src.config.settings import settings
from openai import AsyncOpenAI
from src.utils.logging import logger
async def process_documents(base_dir: str):
"""Process markdown documents to embeddings"""
openai_client = AsyncOpenAI(
api_key=settings.OPENAI_API_KEY,
base_url=settings.OPENAI_API_BASE
)
db_service = DatabaseService()
embedding_service = EmbeddingService(openai_client)
processor = DocumentProcessor(db_service, embedding_service)
await processor.process_directory(base_dir)
async def process_raw_data(raw_dir: str, output_dir: str, process_docs: bool = False):
"""Process raw RST files to markdown and optionally process documents
Args:
raw_dir (str): Directory containing raw RST files
output_dir (str): Output directory for markdown files
process_docs (bool): Whether to process documents after conversion
"""
# Step 1: Convert RST to Markdown
converter = MarkdownConverter()
converter.process_directory(raw_dir, output_dir)
# Step 2: Process markdown files to documents (optional)
if process_docs:
await process_documents(output_dir)
async def check_updates(raw_dir: str, markdown_dir: str):
"""Check for updates in raw data and process changed files.
Args:
raw_dir (str): Raw data directory
markdown_dir (str): Markdown data directory
Returns:
Added, modified, removed files
"""
openai_client = AsyncOpenAI(
api_key=settings.OPENAI_API_KEY,
base_url=settings.OPENAI_API_BASE
)
db_service = DatabaseService()
embedding_service = EmbeddingService(openai_client)
document_processor = DocumentProcessor(db_service, embedding_service)
markdown_converter = MarkdownConverter()
update_handler = FileUpdateHandler(
document_processor=document_processor,
markdown_converter=markdown_converter
)
added, modified, removed = await update_handler.check_and_process_updates(
raw_dir=raw_dir,
markdown_dir=markdown_dir
)
logger.info(f"Added files: {len(added)}")
logger.info(f"Modified files: {len(modified)}")
logger.info(f"Removed files: {len(removed)}")
return added, modified, removed
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Odoo Documentation Assistant')
subparsers = parser.add_subparsers(dest='command', help='Commands')
# Server command
server_parser = subparsers.add_parser('serve', help='Run the server')
server_parser.add_argument('--mode', choices=['api', 'ui'], required=True,
help='Run mode: api for FastAPI server or ui for Streamlit interface')
server_parser.add_argument('--host', default='0.0.0.0', help='Host to run the server on')
server_parser.add_argument('--port', type=int, default=8000, help='Port to run the server on')
# Process commands
process_raw_parser = subparsers.add_parser('process-raw', help='Process raw RST files')
process_raw_parser.add_argument('--process-docs', action='store_true',
help='Process documents after conversion')
process_docs_parser = subparsers.add_parser('process-docs', help='Process markdown documents')
# Add check-updates command
check_updates_parser = subparsers.add_parser('check-updates',
help='Check and process updated files')
args = parser.parse_args()
if args.command == 'serve':
if args.mode == 'api':
uvicorn.run(app, host=args.host, port=args.port)
elif args.mode == 'ui':
subprocess.run(["streamlit", "run", "src/ui/streamlit_app.py"])
elif args.command == 'process-raw':
asyncio.run(process_raw_data(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR, args.process_docs))
elif args.command == 'process-docs':
async def run_sequential():
# First run check-updates to generate file cache
await check_updates(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR)
# Then process the documents
await process_documents(settings.MARKDOWN_DATA_DIR)
asyncio.run(run_sequential())
elif args.command == 'check-updates':
asyncio.run(check_updates(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR))
else:
parser.print_help()