I don't think you will be find mockup bigquery services on apache beam.
Asides from that, there are options available within Apache beam that you can use to mimic a database and build your own function that can behave in a similar way. I will share an updated example Reading from a SQLite database from the beam project for visibility:
1. Install these libraries
!pip install --upgrade apache-beam
!pip install --upgrade httplib2==0.20.0
# Note you might encounter issues with the library installation reinstall if necessary.
2. Create a mockup database (using sqlite3)
import sqlite3
database_file = "moon-phases.db" #@param {type:"string"}
with sqlite3.connect(database_file) as db:
cursor = db.cursor()
# Create the moon_phases table.
cursor.execute('''
CREATE TABLE IF NOT EXISTS moon_phases (
id INTEGER PRIMARY KEY,
phase_emoji TEXT NOT NULL,
peak_datetime DATETIME NOT NULL,
phase TEXT NOT NULL)''')
# Truncate the table if it's already populated.
cursor.execute('DELETE FROM moon_phases')
# Insert some sample data.
insert_moon_phase = 'INSERT INTO moon_phases(phase_emoji, peak_datetime, phase) VALUES(?, ?, ?)'
cursor.execute(insert_moon_phase, ('', '2017-12-03 15:47:00', 'Full Moon'))
cursor.execute(insert_moon_phase, ('', '2017-12-10 07:51:00', 'Last Quarter'))
cursor.execute(insert_moon_phase, ('', '2017-12-18 06:30:00', 'New Moon'))
cursor.execute(insert_moon_phase, ('', '2017-12-26 09:20:00', 'First Quarter'))
cursor.execute(insert_moon_phase, ('', '2018-01-02 02:24:00', 'Full Moon'))
cursor.execute(insert_moon_phase, ('', '2018-01-08 22:25:00', 'Last Quarter'))
cursor.execute(insert_moon_phase, ('', '2018-01-17 02:17:00', 'New Moon'))
cursor.execute(insert_moon_phase, ('', '2018-01-24 22:20:00', 'First Quarter'))
cursor.execute(insert_moon_phase, ('', '2018-01-31 13:27:00', 'Full Moon'))
# Query for the data in the table to make sure it's populated.
cursor.execute('SELECT * FROM moon_phases')
for row in cursor.fetchall():
print(row)
3. Run the pipelines queries
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
import sqlite3
from typing import Iterable, List, Tuple, Dict
class SQLiteSelect(beam.DoFn):
def __init__(self, database_file: str):
self.database_file = database_file
self.connection = None
def setup(self):
self.connection = sqlite3.connect(self.database_file)
def process(self, query: Tuple[str, List[str]]) -> Iterable[Dict[str, str]]:
table, columns = query
cursor = self.connection.cursor()
cursor.execute(f"SELECT {','.join(columns)} FROM {table}")
for row in cursor.fetchall():
yield dict(zip(columns, row))
def teardown(self):
self.connection.close()
@beam.ptransform_fn
@beam.typehints.with_output_types(Dict[str, str])
def SelectFromSQLite(
pbegin: beam.pvalue.PBegin,
database_file: str,
queries: List[Tuple[str, List[str]]],
) -> beam.PCollection[Dict[str, str]]:
return (
pbegin
| 'Create None' >> beam.Create(queries)
| 'SQLite SELECT' >> beam.ParDo(SQLiteSelect(database_file))
)
queries = [
# (table_name, [column1, column2, ...])
('moon_phases', ['phase_emoji', 'peak_datetime', 'phase']),
('moon_phases', ['phase_emoji', 'phase']),
]
options = PipelineOptions(flags=[], type_check_additional='all')
with beam.Pipeline(options=options) as pipeline:
(
pipeline
| 'Read from SQLite' >> SelectFromSQLite(database_file, queries)
| 'Print rows' >> beam.Map(print)
)
output
{'phase_emoji': '', 'peak_datetime': '2017-12-03 15:47:00', 'phase': 'Full Moon'}
{'phase_emoji': '', 'peak_datetime': '2017-12-10 07:51:00', 'phase': 'Last Quarter'}
{'phase_emoji': '', 'peak_datetime': '2017-12-18 06:30:00', 'phase': 'New Moon'}
{'phase_emoji': '', 'peak_datetime': '2017-12-26 09:20:00', 'phase': 'First Quarter'}
{'phase_emoji': '', 'peak_datetime': '2018-01-02 02:24:00', 'phase': 'Full Moon'}
{'phase_emoji': '', 'peak_datetime': '2018-01-08 22:25:00', 'phase': 'Last Quarter'}
{'phase_emoji': '', 'peak_datetime': '2018-01-17 02:17:00', 'phase': 'New Moon'}
{'phase_emoji': '', 'peak_datetime': '2018-01-24 22:20:00', 'phase': 'First Quarter'}
{'phase_emoji': '', 'peak_datetime': '2018-01-31 13:27:00', 'phase': 'Full Moon'}
{'phase_emoji': '', 'phase': 'Full Moon'}
{'phase_emoji': '', 'phase': 'Last Quarter'}
{'phase_emoji': '', 'phase': 'New Moon'}
{'phase_emoji': '', 'phase': 'First Quarter'}
{'phase_emoji': '', 'phase': 'Full Moon'}
{'phase_emoji': '', 'phase': 'Last Quarter'}
{'phase_emoji': '', 'phase': 'New Moon'}
{'phase_emoji': '', 'phase': 'First Quarter'}
{'phase_emoji': '', 'phase': 'Full Moon'}
For more beam examples (either using java or python) the beam github project example node. Be advice that you might have to apply some adjustments as some are a bit deprecated or missing some imports.
As a final note, more on the testing aspect of your question. I think you can use PAssert
to create testing pipelines as described on its official documentation.