Skip to content

Commit

Permalink
Merge pull request #278 from amosproj/253-clean-up-4
Browse files Browse the repository at this point in the history
253 clean up 4
  • Loading branch information
chrisklg authored Jan 21, 2025
2 parents 584a295 + a539236 commit 08e0e69
Show file tree
Hide file tree
Showing 43 changed files with 1,538 additions and 1,844 deletions.
19 changes: 3 additions & 16 deletions apps/analyzer/metadata_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,15 @@ class Analyzer:
def init(
database,
backend,
simple_analyzer,
simple_rule_based_analyzer,
time_series_analyzer,
):
Analyzer.database = database
Analyzer.backend = backend
Analyzer.simple_analyzer = simple_analyzer
Analyzer.simple_rule_based_analyzer = simple_rule_based_analyzer
Analyzer.time_series_analyzer = time_series_analyzer
Analyzer.series_loaded = False

def analyze():
data = list(Analyzer.database.get_results())
converted_data = []

for elem in data:
if elem.data_size != None:
converted_data.append(Analyzer._convert_result(elem))

result = Analyzer.simple_analyzer.analyze(converted_data)

return result

# Convert a result from the database into the format used by the backend
def _convert_result(result):
backup_type = {
Expand Down Expand Up @@ -80,8 +66,9 @@ def _send_Backups():
results = list(Analyzer.database.get_results(latest_backup_date))

schedules = list(Analyzer.database.get_schedules())
Analyzer.simple_rule_based_analyzer.analyze_creation_dates(results, schedules, None, latest_backup_date, "ONLY_SCHEDULES")

Analyzer.simple_rule_based_analyzer.analyze_creation_dates(results, schedules, None, latest_backup_date,
"ONLY_SCHEDULES")

# Batch the api calls to the backend for improved efficiency
batch = []
count = 0
Expand Down
12 changes: 6 additions & 6 deletions apps/analyzer/metadata_analyzer/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ def send_storage_data(self, storage):
r = requests.post(url, json=storage)
r.raise_for_status()

def create_size_alert(self, alert):
url = self.backend_url + "alerting/size"
r = requests.post(url, json=alert)
def create_size_alerts_batched(self, alerts):
url = self.backend_url + "alerting/size/batched"
r = requests.post(url, json=alerts)
r.raise_for_status()

def create_creation_date_alert(self, alert):
url = self.backend_url + "alerting/creationDate"
r = requests.post(url, json=alert)
def create_creation_date_alerts_batched(self, alerts):
url = self.backend_url + "alerting/creationDate/batched"
r = requests.post(url, json=alerts)
r.raise_for_status()

def create_storage_fill_alerts(self, alerts):
Expand Down
15 changes: 3 additions & 12 deletions apps/analyzer/metadata_analyzer/database.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import datetime
import os

from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
from metadata_analyzer.models import BackupData, Result, Tasks, DataStore, Schedule
from metadata_analyzer.analyzer import Analyzer
from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer
import os

from metadata_analyzer.models import Result, Tasks, DataStore, Schedule


class Database:
Expand All @@ -28,14 +27,6 @@ def __init__(self):
+ db_name
)


def get_data(self):
session = Session(self.engine)
stmt = select(BackupData)

result = session.scalars(stmt)
return result

def get_results(self, latest_backup_date = None):
session = Session(self.engine)
stmt = select(Result)
Expand Down
41 changes: 8 additions & 33 deletions apps/analyzer/metadata_analyzer/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from flask import Flask, request, jsonify
import os

from dotenv import load_dotenv
from flasgger import Swagger
from flasgger import swag_from
from flask import Flask, request, jsonify

from metadata_analyzer.analyzer import Analyzer
from metadata_analyzer.backend import Backend
from metadata_analyzer.database import Database
from metadata_analyzer.simple_analyzer import SimpleAnalyzer
from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer
from metadata_analyzer.analyzer import Analyzer
from metadata_analyzer.time_series_analyzer import Time_series_analyzer
from metadata_analyzer.backend import Backend
from flasgger import Swagger
from flasgger import swag_from
import requests
import os

app = Flask(__name__)
swagger = Swagger(app)
Expand All @@ -21,29 +21,6 @@
def hello_world():
return "Hello, world!"


@app.route("/echo", methods=["POST"])
@swag_from(os.path.join(path,'swagger','echo.yaml'), validation=False)
def echo():
if request.method == "POST":
data = request.get_json()
obj = data["body"]
strData = obj["text"]
newData = ""

for i in range(len(strData) - 1, -1, -1):
newData = newData + strData[i]

newBody = '{ "output": "' + newData + '" }'
return newBody


@app.route("/analyze", methods=["GET"])
@swag_from(os.path.join(path,'swagger','analyze.yaml'), validation=False)
def analyze():
return jsonify(Analyzer.analyze())


@app.route("/updateBasicBackupData", methods=["POST"])
@swag_from(os.path.join(path,'swagger','updateBasicBackupData.yaml'), validation=False)
def update_data():
Expand Down Expand Up @@ -258,7 +235,6 @@ def calculate_training_indices():
def main():
database = Database()
backend = Backend(os.getenv("BACKEND_URL"))
simple_analyzer = SimpleAnalyzer()
parameters = []
parameters.append(os.getenv("ANOMALY_THRESHOLD"))
parameters.append(os.getenv("CLUSTER_NUMBER"))
Expand All @@ -267,7 +243,6 @@ def main():
Analyzer.init(
database,
backend,
simple_analyzer,
simple_rule_based_analyzer,
time_series_analyzer,
)
Expand Down
188 changes: 87 additions & 101 deletions apps/analyzer/metadata_analyzer/models.py
Original file line number Diff line number Diff line change
@@ -1,130 +1,116 @@
from datetime import datetime

from sqlalchemy.orm import mapped_column, Mapped
from sqlalchemy.orm import DeclarativeBase
from typing import ClassVar, Optional

class Base(DeclarativeBase):
pass

class BackupData(Base):
__tablename__ = "BackupData"

id: Mapped[str] = mapped_column(primary_key=True)
saveset: Mapped[str]
sizeMB: Mapped[int]
creationDate: Mapped[datetime]
bio: Mapped[str]
from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.orm import mapped_column, Mapped

def __repr__(self):
return f"""BackupData(id={self.id}, sizeMB={self.sizeMB}, creationDate={self.creationDate}, bio={self.bio!r})"""

def __str__(self):
return repr(self)
class Base(DeclarativeBase):
pass


class Result(Base):
__tablename__ = "results"

# For now I only added the most relevant columns
saveset: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]
task: Mapped[str]
task_uuid: Mapped[str]
fdi_type: Mapped[str]
is_backup: Mapped[int]
state: Mapped[int]
subtask_flag: Mapped[str]
schedule: Mapped[str]

start_time: Mapped[datetime]
stop_time: Mapped[datetime]
sbc_start: Mapped[datetime]

data_size: Mapped[int]
throughput: Mapped[str]
duration: Mapped[int]
scheduledTime: ClassVar[Optional[datetime]] = None

def __repr__(self):
return f"""Result(uuid={self.uuid})"""

def __str__(self):
return repr(self)

def as_dict(self):
return {
"saveset": self.saveset,
"uuid": self.uuid,
"task": self.task,
"task_uuid": self.task_uuid,
"fdi_type": self.fdi_type,
"is_backup": self.is_backup,
"state": self.state,
"start_time": self.start_time,
"stop_time": self.stop_time,
"sbc_start": self.sbc_start,
"data_size": self.data_size,
"throughput": self.throughput,
"duration": self.duration,
"scheduledTime": self.scheduledTime,
}
__tablename__ = "results"

# For now I only added the most relevant columns
saveset: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]
task: Mapped[str]
task_uuid: Mapped[str]
fdi_type: Mapped[str]
is_backup: Mapped[int]
state: Mapped[int]
subtask_flag: Mapped[str]
schedule: Mapped[str]

start_time: Mapped[datetime]
stop_time: Mapped[datetime]
sbc_start: Mapped[datetime]

data_size: Mapped[int]
throughput: Mapped[str]
duration: Mapped[int]
scheduledTime: ClassVar[Optional[datetime]] = None

def __repr__(self):
return f"""Result(uuid={self.uuid})"""

def __str__(self):
return repr(self)

def as_dict(self):
return {
"saveset": self.saveset,
"uuid": self.uuid,
"task": self.task,
"task_uuid": self.task_uuid,
"fdi_type": self.fdi_type,
"is_backup": self.is_backup,
"state": self.state,
"start_time": self.start_time,
"stop_time": self.stop_time,
"sbc_start": self.sbc_start,
"data_size": self.data_size,
"throughput": self.throughput,
"duration": self.duration,
"scheduledTime": self.scheduledTime,
}


class Tasks(Base):
__tablename__ = "tasks"
__tablename__ = "tasks"

# For now I only added the most relevant columns
task: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]
# For now I only added the most relevant columns
task: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]

def __repr__(self):
return f"""Tasks(uuid={self.uuid})"""
def __repr__(self):
return f"""Tasks(uuid={self.uuid})"""

def __str__(self):
return repr(self)
def __str__(self):
return repr(self)


class DataStore(Base):
__tablename__ = "data_stores"
__tablename__ = "data_stores"

# For now I only added the most relevant columns
name: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]
capacity: Mapped[float]
high_water_mark: Mapped[float]
filled: Mapped[float]
stored: Mapped[float]
# For now I only added the most relevant columns
name: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]
capacity: Mapped[float]
high_water_mark: Mapped[float]
filled: Mapped[float]
stored: Mapped[float]

def __repr__(self):
return f"""DataStore(name={self.name})"""
def __repr__(self):
return f"""DataStore(name={self.name})"""

def __str__(self):
return repr(self)
def __str__(self):
return repr(self)


class Schedule(Base):
__tablename__ = "schedules"
__tablename__ = "schedules"

# For now I only added the most relevant columns
name: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]
# For now I only added the most relevant columns
name: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]

p_base: Mapped[str]
p_count: Mapped[int]
p_base: Mapped[str]
p_count: Mapped[int]

start_time: Mapped[str]
start_time: Mapped[str]

mo: Mapped[str]
tu: Mapped[str]
we: Mapped[str]
th: Mapped[str]
fr: Mapped[str]
sa: Mapped[str]
su: Mapped[str]
mo: Mapped[str]
tu: Mapped[str]
we: Mapped[str]
th: Mapped[str]
fr: Mapped[str]
sa: Mapped[str]
su: Mapped[str]

def __repr__(self):
return f"""Schedule(name={self.name})"""
def __repr__(self):
return f"""Schedule(name={self.name})"""

def __str__(self):
return repr(self)
def __str__(self):
return repr(self)
Loading

0 comments on commit 08e0e69

Please sign in to comment.