Real Project
Learn through a complete publication analysis dashboard implementation.
Project Overview
Build a comprehensive publication analysis dashboard with:
- Publication metrics (count, citations, trends)
- Author analysis
- Journal analysis
- Interactive filters and visualizations
Final Dashboard Structure
┌─────────────────────────────────────────────────┐
│ Publication Analysis Dashboard │
├─────────────────────────────────────────────────┤
│ Filters: [Date Range] [Journal] [Author] │
│ │
│ ┌─────────────────┐ ┌─────────────────┐ │
│ │ Total Pubs: 250 │ │ Avg Citations:9 │ │
│ └─────────────────┘ └─────────────────┘ │
│ │
│ ┌─────────────────────────────────────┐ │
│ │ Publications Over Time │ │
│ │ [Line chart: 2020-2024] │ │
│ └─────────────────────────────────────┘ │
│ │
│ ┌──────────────────┐ ┌──────────────────┐ │
│ │ Top 10 Authors │ │ By Journal │ │
│ └──────────────────┘ └──────────────────┘ │
└─────────────────────────────────────────────────┘
Implementation Steps
Step 1: Create Project with dashboard template
💡 Setup & Deployment: For detailed setup instructions and deployment steps, see Dashboard Development in the Setup Guide.
Step 2: Define Data Layer
# data/datasets.py
from ds_dash_support.utils.datamanager.datasets import GbqPydanticDataset
annual_pubs = GbqPydanticDataset(
name="annual_pubs",
query=f"""
SELECT
year,
COUNT(*) as publication_count,
AVG(citations) as avg_citations
FROM `{{{{ project }}}}.{{{{ dataset }}}}.publications`
WHERE year >= {{min_year}}
AND year <= {{max_year}}
GROUP BY year
ORDER BY year DESC
""",
)
Step 3: Create Overview Page
# pages/overview.py
import dash_bootstrap_components as dbc
from dash import html, dcc, callback, Input, Output
import plotly.express as px
from data.datamanager import ddm
from ds_dash_support.themes.looker import Container, Graph
data_callback = ddm.data_callback(
flatten_kwargs=True,
manager_filters=Input("dashboard-manager", "filters"),
)
layout = Container([
html.H1("Publication Analysis Dashboard"),
dbc.Row([
dbc.Col(dcc.Graph(id='publications-over-time'), md=12)
]),
dbc.Row([
dbc.Col(dcc.Graph(id='top-authors'), md=6),
dbc.Col(dcc.Graph(id='by-journal'), md=6),
]),
])
@data_callback(
Output('publications-over-time', 'figure'),
)
def update_time_chart(data):
df = data.datasets.annual_pubs
return px.line(df, x='year', y='publication_count')
Advanced Features
Multi-Criteria Filtering
@callback(
[Output('results-summary', 'children'),
Output('results-table', 'children')],
[Input('search-button', 'n_clicks')],
[State('author-search', 'value'),
State('journal-dropdown', 'value'),
State('year-slider', 'value'),
State('citation-slider', 'value')],
prevent_initial_call=True
)
def search(n_clicks, author, journals, years, citations):
"""Complex search with multiple criteria"""
try:
data = data_manager.get_publications()
# Apply filters
if author:
data = data[data['authors'].str.contains(author, case=False, na=False)]
if journals:
data = data[data['journal'].isin(journals)]
data = data[
(data['publication_year'] >= years[0]) &
(data['publication_year'] <= years[1]) &
(data['citations'] >= citations[0]) &
(data['citations'] <= citations[1])
]
summary = dbc.Alert([
html.P(f"Found {len(data)} publications"),
html.P(f"Total citations: {data['citations'].sum():,}"),
html.P(f"Average citations: {data['citations'].mean():.1f}")
], color='success')
table = dbc.Table.from_dataframe(
data[['title', 'authors', 'journal', 'publication_year', 'citations']],
striped=True,
bordered=True,
hover=True
)
return summary, table
except Exception as e:
return dbc.Alert(f"Error: {str(e)}", color='danger'), None
Data Export
from dash import dcc
from datetime import datetime
layout = dbc.Container([
dcc.Download(id='download-csv'),
dbc.Button('Export to CSV', id='export-csv-button', color='primary'),
])
@callback(
Output('download-csv', 'data'),
Input('export-csv-button', 'n_clicks'),
State('filter-value', 'value'),
prevent_initial_call=True
)
def export_csv(n_clicks, filter_value):
"""Export filtered data to CSV"""
data = data_manager.get_publications(filters={'category': filter_value})
csv_string = data.to_csv(index=False)
return dict(
content=csv_string,
filename=f'publications_{datetime.now().strftime("%Y%m%d")}.csv'
)
Real-Time Updates
layout = dbc.Container([
dcc.Interval(
id='refresh-interval',
interval=300000, # 5 minutes
n_intervals=0
),
html.Div(id='last-updated'),
])
@callback(
Output('last-updated', 'children'),
Input('refresh-interval', 'n_intervals')
)
def update_timestamp(n_intervals):
return f'Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'
@callback(
Output('data-store', 'data'),
Input('refresh-interval', 'n_intervals')
)
def refresh_data(n_intervals):
"""Refresh data periodically"""
data = data_manager.get_publications()
return data.to_dict('records')
Graceful Error Handling
@callback(
Output('chart', 'figure'),
Input('load-button', 'n_clicks')
)
def safe_chart_update(n_clicks):
"""Handle errors gracefully"""
try:
data = data_manager.get_publications()
return px.bar(data, x='journal', y='citations')
except TimeoutError:
logger.warning("Query timeout, using cached data")
cached_data = get_last_cached_data()
if cached_data is not None:
return px.bar(cached_data, x='journal', y='citations')
return go.Figure().add_annotation(
text="Could not load data (timeout)",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False
)
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return go.Figure().add_annotation(
text="An error occurred. Please try again.",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False
)
Complete Code Examples
DataManager Implementation
# data/manager.py
from google.cloud import bigquery
from cache import CacheManager
import logging
import pandas as pd
logger = logging.getLogger(__name__)
cache = CacheManager()
class DataManager:
"""Central data access for all datasets"""
def __init__(self, project_id):
self.client = bigquery.Client(project=project_id)
self.project_id = project_id
def get_all_publications(self, limit=10000):
"""Get all publications"""
cache_key = f'all_publications_{limit}'
cached = cache.get(cache_key)
if cached is not None:
logger.info(f"Cache hit: {cache_key}")
return cached
logger.info("Fetching all publications from BigQuery...")
query = f"""
SELECT
id, title, authors, journal,
publication_year, citations, doi, abstract
FROM `{self.project_id}.publications.data`
ORDER BY citations DESC
LIMIT {limit}
"""
result = self.client.query(query).result().to_dataframe()
cache.set(cache_key, result)
return result
def get_top_authors(self, limit=10):
"""Get top authors by citations"""
cache_key = f'top_authors_{limit}'
cached = cache.get(cache_key)
if cached is not None:
return cached
query = f"""
SELECT
name,
COUNT(*) as publication_count,
SUM(citations) as total_citations,
ROUND(AVG(citations), 1) as avg_citations
FROM `{self.project_id}.publications.authors`
GROUP BY name
ORDER BY total_citations DESC
LIMIT {limit}
"""
result = self.client.query(query).result().to_dataframe()
cache.set(cache_key, result)
return result
App Configuration
# config.py
import os
from dotenv import load_dotenv
load_dotenv()
# Environment
ENVIRONMENT = os.getenv('ENVIRONMENT', 'development')
DEBUG = os.getenv('DEBUG', 'False') == 'True'
# GCP
GCP_PROJECT_ID = os.getenv('GCP_PROJECT_ID', 'your-project')
GCP_CREDENTIALS_PATH = os.getenv('GCP_CREDENTIALS_PATH', './credentials.json')
# Cache
CACHE_TTL = int(os.getenv('CACHE_TTL', '3600'))
REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
# Logging
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
# App
APP_NAME = 'Publication Dashboard'
APP_VERSION = '1.0.0'