Skip to content

Real Project

Learn through a complete publication analysis dashboard implementation.

Project Overview

Build a comprehensive publication analysis dashboard with:

  • Publication metrics (count, citations, trends)
  • Author analysis
  • Journal analysis
  • Interactive filters and visualizations

Final Dashboard Structure

┌─────────────────────────────────────────────────┐
│  Publication Analysis Dashboard                  │
├─────────────────────────────────────────────────┤
│  Filters:  [Date Range]  [Journal]  [Author]   │
│                                                 │
│  ┌─────────────────┐  ┌─────────────────┐      │
│  │ Total Pubs: 250 │  │ Avg Citations:9 │      │
│  └─────────────────┘  └─────────────────┘      │
│                                                 │
│  ┌─────────────────────────────────────┐        │
│  │ Publications Over Time               │        │
│  │ [Line chart: 2020-2024]              │        │
│  └─────────────────────────────────────┘        │
│                                                 │
│  ┌──────────────────┐  ┌──────────────────┐    │
│  │ Top 10 Authors   │  │ By Journal       │    │
│  └──────────────────┘  └──────────────────┘    │
└─────────────────────────────────────────────────┘

Implementation Steps

Step 1: Create Project with dashboard template

💡 Setup & Deployment: For detailed setup instructions and deployment steps, see Dashboard Development in the Setup Guide.

Step 2: Define Data Layer

# data/datasets.py
from ds_dash_support.utils.datamanager.datasets import GbqPydanticDataset

annual_pubs = GbqPydanticDataset(
    name="annual_pubs",
    query=f"""
SELECT
    year,
    COUNT(*) as publication_count,
    AVG(citations) as avg_citations
FROM `{{{{ project }}}}.{{{{ dataset }}}}.publications`
WHERE year >= {{min_year}}
    AND year <= {{max_year}}
GROUP BY year
ORDER BY year DESC
""",
)

Step 3: Create Overview Page

# pages/overview.py
import dash_bootstrap_components as dbc
from dash import html, dcc, callback, Input, Output
import plotly.express as px
from data.datamanager import ddm
from ds_dash_support.themes.looker import Container, Graph

data_callback = ddm.data_callback(
    flatten_kwargs=True,
    manager_filters=Input("dashboard-manager", "filters"),
)

layout = Container([
    html.H1("Publication Analysis Dashboard"),

    dbc.Row([
        dbc.Col(dcc.Graph(id='publications-over-time'), md=12)
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id='top-authors'), md=6),
        dbc.Col(dcc.Graph(id='by-journal'), md=6),
    ]),
])

@data_callback(
    Output('publications-over-time', 'figure'),
)
def update_time_chart(data):
    df = data.datasets.annual_pubs
    return px.line(df, x='year', y='publication_count')

Advanced Features

Multi-Criteria Filtering

@callback(
    [Output('results-summary', 'children'),
     Output('results-table', 'children')],
    [Input('search-button', 'n_clicks')],
    [State('author-search', 'value'),
     State('journal-dropdown', 'value'),
     State('year-slider', 'value'),
     State('citation-slider', 'value')],
    prevent_initial_call=True
)
def search(n_clicks, author, journals, years, citations):
    """Complex search with multiple criteria"""

    try:
        data = data_manager.get_publications()

        # Apply filters
        if author:
            data = data[data['authors'].str.contains(author, case=False, na=False)]

        if journals:
            data = data[data['journal'].isin(journals)]

        data = data[
            (data['publication_year'] >= years[0]) &
            (data['publication_year'] <= years[1]) &
            (data['citations'] >= citations[0]) &
            (data['citations'] <= citations[1])
        ]

        summary = dbc.Alert([
            html.P(f"Found {len(data)} publications"),
            html.P(f"Total citations: {data['citations'].sum():,}"),
            html.P(f"Average citations: {data['citations'].mean():.1f}")
        ], color='success')

        table = dbc.Table.from_dataframe(
            data[['title', 'authors', 'journal', 'publication_year', 'citations']],
            striped=True,
            bordered=True,
            hover=True
        )

        return summary, table

    except Exception as e:
        return dbc.Alert(f"Error: {str(e)}", color='danger'), None

Data Export

from dash import dcc
from datetime import datetime

layout = dbc.Container([
    dcc.Download(id='download-csv'),
    dbc.Button('Export to CSV', id='export-csv-button', color='primary'),
])

@callback(
    Output('download-csv', 'data'),
    Input('export-csv-button', 'n_clicks'),
    State('filter-value', 'value'),
    prevent_initial_call=True
)
def export_csv(n_clicks, filter_value):
    """Export filtered data to CSV"""
    data = data_manager.get_publications(filters={'category': filter_value})
    csv_string = data.to_csv(index=False)

    return dict(
        content=csv_string,
        filename=f'publications_{datetime.now().strftime("%Y%m%d")}.csv'
    )

Real-Time Updates

layout = dbc.Container([
    dcc.Interval(
        id='refresh-interval',
        interval=300000,  # 5 minutes
        n_intervals=0
    ),
    html.Div(id='last-updated'),
])

@callback(
    Output('last-updated', 'children'),
    Input('refresh-interval', 'n_intervals')
)
def update_timestamp(n_intervals):
    return f'Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'

@callback(
    Output('data-store', 'data'),
    Input('refresh-interval', 'n_intervals')
)
def refresh_data(n_intervals):
    """Refresh data periodically"""
    data = data_manager.get_publications()
    return data.to_dict('records')

Graceful Error Handling

@callback(
    Output('chart', 'figure'),
    Input('load-button', 'n_clicks')
)
def safe_chart_update(n_clicks):
    """Handle errors gracefully"""

    try:
        data = data_manager.get_publications()
        return px.bar(data, x='journal', y='citations')

    except TimeoutError:
        logger.warning("Query timeout, using cached data")
        cached_data = get_last_cached_data()

        if cached_data is not None:
            return px.bar(cached_data, x='journal', y='citations')

        return go.Figure().add_annotation(
            text="Could not load data (timeout)",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False
        )

    except Exception as e:
        logger.error(f"Unexpected error: {str(e)}")
        return go.Figure().add_annotation(
            text="An error occurred. Please try again.",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False
        )

Complete Code Examples

DataManager Implementation

# data/manager.py
from google.cloud import bigquery
from cache import CacheManager
import logging
import pandas as pd

logger = logging.getLogger(__name__)
cache = CacheManager()

class DataManager:
    """Central data access for all datasets"""

    def __init__(self, project_id):
        self.client = bigquery.Client(project=project_id)
        self.project_id = project_id

    def get_all_publications(self, limit=10000):
        """Get all publications"""
        cache_key = f'all_publications_{limit}'
        cached = cache.get(cache_key)
        if cached is not None:
            logger.info(f"Cache hit: {cache_key}")
            return cached

        logger.info("Fetching all publications from BigQuery...")

        query = f"""
        SELECT
            id, title, authors, journal,
            publication_year, citations, doi, abstract
        FROM `{self.project_id}.publications.data`
        ORDER BY citations DESC
        LIMIT {limit}
        """

        result = self.client.query(query).result().to_dataframe()
        cache.set(cache_key, result)
        return result

    def get_top_authors(self, limit=10):
        """Get top authors by citations"""
        cache_key = f'top_authors_{limit}'
        cached = cache.get(cache_key)
        if cached is not None:
            return cached

        query = f"""
        SELECT
            name,
            COUNT(*) as publication_count,
            SUM(citations) as total_citations,
            ROUND(AVG(citations), 1) as avg_citations
        FROM `{self.project_id}.publications.authors`
        GROUP BY name
        ORDER BY total_citations DESC
        LIMIT {limit}
        """

        result = self.client.query(query).result().to_dataframe()
        cache.set(cache_key, result)
        return result

App Configuration

# config.py
import os
from dotenv import load_dotenv

load_dotenv()

# Environment
ENVIRONMENT = os.getenv('ENVIRONMENT', 'development')
DEBUG = os.getenv('DEBUG', 'False') == 'True'

# GCP
GCP_PROJECT_ID = os.getenv('GCP_PROJECT_ID', 'your-project')
GCP_CREDENTIALS_PATH = os.getenv('GCP_CREDENTIALS_PATH', './credentials.json')

# Cache
CACHE_TTL = int(os.getenv('CACHE_TTL', '3600'))
REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))

# Logging
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')

# App
APP_NAME = 'Publication Dashboard'
APP_VERSION = '1.0.0'