237 lines
7.7 KiB
Python
237 lines
7.7 KiB
Python
import streamlit as st
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import json
|
|
import re
|
|
from collections import Counter
|
|
|
|
# Load the JSON data from the uploaded file
|
|
def load_json(file):
|
|
try:
|
|
# Load JSON data into a Python list
|
|
data = json.load(file)
|
|
except json.JSONDecodeError:
|
|
st.error("The uploaded file is not a valid JSON.")
|
|
return None
|
|
return data
|
|
|
|
# Function to categorize versions into buckets
|
|
def categorize_version(version):
|
|
# Remove leading 'v' if present
|
|
if version.startswith('v'):
|
|
version = version[1:]
|
|
|
|
# Split the version into major and minor parts
|
|
version_parts = re.split(r'\.|-', version)
|
|
|
|
try:
|
|
major = int(version_parts[0])
|
|
minor = int(version_parts[1]) if len(version_parts) > 1 else 0
|
|
except ValueError:
|
|
return 'Other'
|
|
|
|
# Group versions based on major and minor version numbers
|
|
if major == 8:
|
|
if 0 <= minor <= 9:
|
|
return '8.0-8.9'
|
|
elif 10 <= minor <= 19:
|
|
return '8.10-8.19'
|
|
elif 20 <= minor <= 29:
|
|
return '8.20-8.29'
|
|
elif 30 <= minor <= 39:
|
|
return '8.30-8.39'
|
|
elif 40 <= minor <= 49:
|
|
return '8.40-8.49'
|
|
elif 50 <= minor <= 59:
|
|
return '8.50-8.59'
|
|
else:
|
|
return '8.60+'
|
|
elif major == 9:
|
|
if 0 <= minor <= 9:
|
|
return '9.0-9.9'
|
|
elif 10 <= minor <= 19:
|
|
return '9.10-9.19'
|
|
elif 20 <= minor <= 29:
|
|
return '9.20-9.29'
|
|
elif 30 <= minor <= 39:
|
|
return '9.30-9.39'
|
|
elif 40 <= minor <= 49:
|
|
return '9.40-9.49'
|
|
elif 50 <= minor <= 59:
|
|
return '9.50-9.59'
|
|
else:
|
|
return '9.60+'
|
|
elif major == 7:
|
|
if 0 <= minor <= 9:
|
|
return '7.0-7.9'
|
|
elif 10 <= minor <= 19:
|
|
return '7.10-7.19'
|
|
elif 20 <= minor <= 29:
|
|
return '7.20-7.29'
|
|
elif 30 <= minor <= 39:
|
|
return '7.30-7.39'
|
|
elif 40 <= minor <= 49:
|
|
return '7.40-7.49'
|
|
elif 50 <= minor <= 59:
|
|
return '7.50-7.59'
|
|
else:
|
|
return '7.60+'
|
|
elif major in range(3, 7):
|
|
return f'{major}.x'
|
|
elif major in range(0, 3):
|
|
return '0.x-2.x'
|
|
else:
|
|
return 'Other'
|
|
|
|
# Classify modules by type
|
|
def classify_module(module_name):
|
|
module_name_lower = module_name.lower()
|
|
if 'bidadapter' in module_name_lower or 'bidadapter' in module_name_lower:
|
|
return 'Bid Adapter'
|
|
elif 'rtdprovider' in module_name_lower or 'rtdmodule' in module_name_lower:
|
|
return 'RTD Module'
|
|
elif 'idsystem' in module_name_lower or 'userid' in module_name_lower:
|
|
return 'ID System'
|
|
elif 'analytics' in module_name_lower or 'analyticsadapter' in module_name_lower:
|
|
return 'Analytics Adapter'
|
|
else:
|
|
return 'Other'
|
|
|
|
# Function to extract and classify modules
|
|
def extract_module_stats(data):
|
|
module_counter = {
|
|
'Bid Adapter': Counter(),
|
|
'RTD Module': Counter(),
|
|
'ID System': Counter(),
|
|
'Analytics Adapter': Counter(),
|
|
'Other': Counter()
|
|
}
|
|
|
|
for item in data:
|
|
modules_list = []
|
|
if 'prebidInstances' in item:
|
|
prebid_instances = item.get('prebidInstances', [])
|
|
for instance in prebid_instances:
|
|
modules = instance.get('modules', [])
|
|
modules_list.extend(modules)
|
|
else:
|
|
# Fallback to prior data structure
|
|
modules = item.get('modules', [])
|
|
modules_list.extend(modules)
|
|
|
|
for module in modules_list:
|
|
category = classify_module(module)
|
|
module_counter[category][module] += 1
|
|
|
|
return module_counter
|
|
|
|
# Create a bar chart of the version buckets
|
|
def create_version_chart(data):
|
|
version_buckets = []
|
|
|
|
for item in data:
|
|
if 'prebidInstances' in item:
|
|
prebid_instances = item.get('prebidInstances', [])
|
|
for instance in prebid_instances:
|
|
version = instance.get('version', '')
|
|
if version:
|
|
bucket = categorize_version(version)
|
|
version_buckets.append(bucket)
|
|
else:
|
|
# Fallback to prior data structure
|
|
version = item.get('version', '')
|
|
if version:
|
|
bucket = categorize_version(version)
|
|
version_buckets.append(bucket)
|
|
|
|
# Create a DataFrame and count occurrences of each version bucket
|
|
if version_buckets:
|
|
version_counts = pd.Series(version_buckets).value_counts().sort_index()
|
|
|
|
# Plot the bar chart
|
|
fig, ax = plt.subplots()
|
|
version_counts.plot(kind='bar', ax=ax)
|
|
ax.set_xlabel('Version Buckets')
|
|
ax.set_ylabel('Number of Instances')
|
|
ax.set_title('Number of Prebid.js Instances per Version Bucket')
|
|
plt.xticks(rotation=45)
|
|
st.pyplot(fig)
|
|
|
|
# Display the total number of instances
|
|
st.write(f"Total Number of Prebid.js Instances: {len(version_buckets)}")
|
|
else:
|
|
st.write("No Prebid.js version information available.")
|
|
|
|
# Function to display module statistics
|
|
def display_module_stats(module_stats):
|
|
for category, counter in module_stats.items():
|
|
st.subheader(f"{category} Popularity")
|
|
df = pd.DataFrame(counter.items(), columns=[category, 'Count'])
|
|
df = df.sort_values(by='Count', ascending=False).reset_index(drop=True)
|
|
st.table(df)
|
|
|
|
# Function to create a plot for the popularity of other libraries
|
|
def create_libraries_chart(data):
|
|
libraries_list = []
|
|
|
|
for item in data:
|
|
libraries = item.get('libraries', [])
|
|
libraries_list.extend(libraries)
|
|
|
|
if libraries_list:
|
|
library_counts = pd.Series(libraries_list).value_counts().sort_values(ascending=False)
|
|
|
|
# Plot the bar chart
|
|
fig, ax = plt.subplots()
|
|
library_counts.plot(kind='bar', ax=ax)
|
|
ax.set_xlabel('Libraries')
|
|
ax.set_ylabel('Number of URLs')
|
|
ax.set_title('Popularity of Other Libraries Detected')
|
|
plt.xticks(rotation=45)
|
|
st.pyplot(fig)
|
|
|
|
# Display the total number of URLs
|
|
st.write(f"Total Number of URLs: {len(data)}")
|
|
else:
|
|
st.write("No libraries information available.")
|
|
|
|
# Streamlit app
|
|
st.title('Prebid.js and Libraries Analysis')
|
|
|
|
uploaded_file = st.file_uploader('Upload a JSON file', type='json')
|
|
|
|
if uploaded_file is not None:
|
|
data = load_json(uploaded_file)
|
|
if data: # Proceed only if there is valid data
|
|
# Filter out entries with more than 300 modules in any instance
|
|
filtered_data = []
|
|
for item in data:
|
|
include_item = True
|
|
modules_list = []
|
|
if 'prebidInstances' in item:
|
|
prebid_instances = item.get('prebidInstances', [])
|
|
for instance in prebid_instances:
|
|
if len(instance.get('modules', [])) > 300:
|
|
include_item = False
|
|
break
|
|
modules_list.extend(instance.get('modules', []))
|
|
else:
|
|
# Fallback to prior data structure
|
|
if len(item.get('modules', [])) > 300:
|
|
include_item = False
|
|
modules_list.extend(item.get('modules', []))
|
|
if include_item:
|
|
filtered_data.append(item)
|
|
|
|
st.header('Version Popularity Chart (Grouped by Buckets)')
|
|
create_version_chart(filtered_data)
|
|
|
|
st.header('Module Statistics')
|
|
module_stats = extract_module_stats(filtered_data)
|
|
display_module_stats(module_stats)
|
|
|
|
st.header('Popularity of Other Libraries Detected')
|
|
create_libraries_chart(filtered_data)
|
|
else:
|
|
st.write("No valid data found in the uploaded file.")
|