Opening Files
using UnROOT
file = ROOTFile(joinpath(pkg_dir, "unroot-tutorial-file.root"))
ROOTFile with 1 entry and 23 streamers. /home/runner/work/HSF_Julia_Tutorial/HSF_Julia_Tutorial/unroot-tutorial-file.root └─ Events (TTree) ├─ "nMuon" ├─ "Muon_pt" ├─ "Muon_eta" ├─ "Muon_phi" ├─ "Muon_mass" └─ "Muon_charge"
File contents
keys(file)
1-element Vector{String}: "Events"
typeof(LazyTree(file, "Events"))
LazyTree with 6 branches: Muon_phi, nMuon, Muon_pt, Muon_eta, Muon_charge...
Accessing contents
tree = LazyTree(file, "Events");
# you can also use `names()` if you want a `Vector{String}`
propertynames(tree)
(:Muon_phi, :nMuon, :Muon_pt, :Muon_eta, :Muon_charge, :Muon_mass)
Trees, Branches, and Events
tree.nMuon
100000-element UnROOT.LazyBranch{Int32, UnROOT.Nojagg, Vector{Int32}}: 2 2 1 4 4 3 2 ⋮ 0 3 2 3 2 3
# more DataFrames.jl syntax
tree[:, :Muon_pt]
100000-element UnROOT.LazyBranch{SubArray{Float32, 1, Vector{Float32}, Tuple{UnitRange{Int64}}, true}, UnROOT.Nooffsetjagg, ArraysOfArrays.VectorOfVectors{Float32, Vector{Float32}, Vector{Int32}, Vector{Tuple{}}}}: Float32[10.763697, 15.736523] Float32[10.53849, 16.327097] Float32[3.2753265] Float32[11.429154, 17.634033, 9.624728, 3.5022252] Float32[3.2834418, 3.6440058, 32.911224, 23.721754] Float32[3.566528, 4.572504, 4.371863] Float32[57.6067, 53.04508] ⋮ Float32[] Float32[30.298992, 9.889153, 6.6907263] Float32[46.36219, 43.90394] Float32[6.343258, 6.9803934, 5.0852466] Float32[3.3099499, 15.68049] Float32[11.444268, 3.082721, 4.9692106]
Events
tree.Muon_pt[begin] #or [1]
2-element view(::Vector{Float32}, 1:2) with eltype Float32: 10.763697 15.736523
# more DataFrames.jl syntax
tree[3, :Muon_pt]
1-element view(::Vector{Float32}, 5:5) with eltype Float32: 3.2753265
tree[1]
UnROOT.LazyEvent at index 1 with 6 columns: (Muon_phi = Float32[-0.034272723, 2.5426154], nMuon = 2, Muon_pt = Float32[10.763697, 15.736523], Muon_eta = Float32[1.0668273, -0.5637865], Muon_charge = Int32[-1, -1], Muon_mass = Float32[0.10565837, 0.10565837])
# to materialize it, don't do this in event loop
Tuple(first(tree))
(Float32[-0.034272723, 2.5426154], 2, Float32[10.763697, 15.736523], Float32[1.0668273, -0.5637865], Int32[-1, -1], Float32[0.10565837, 0.10565837])
Histograms
# We have Makie recipe for histograms, implemented in FHist.jl
using FHist, CairoMakie
plot(Hist1D(tree.nMuon), figure=(resolution = (400, 300),))
stairs(Hist1D(tree.nMuon, 0:10);
figure=(resolution = (400, 300), ),
axis=(xlabel="# of muons in event", ylabel="# of events")
)
Histogramming a jagged array
begin
using LaTeXStrings
using Base.Iterators: flatten #stdlib, save some typing
end
begin
h = Hist1D(flatten(tree.Muon_pt), 0:100)
plot(h;
figure=(resolution = (400, 300), ),
axis=(xlabel=L"\mathrm{Muon}~p_{\mathrm{T}}~\mathrm{[GeV]}", ylabel="# of muons / 1 GeV")
)
end
Logarithmic scales
begin
log_xs = 10 .^ range(log10(1), log10(100); length=100)
h_logscape = Hist1D(flatten(tree.Muon_pt), log_xs)
stairs(h_logscape;
figure=(resolution = (400, 300), ),
axis=(xlabel=L"\mathrm{Muon}~p_{\mathrm{T}}~\mathrm{[GeV]}",
ylabel="# of muons / 1 GeV",
xscale=Makie.pseudolog10,
xticks=[1,10,100]
)
)
end
Columnar? Row-based? both!
Counting
length(tree), length(tree.nMuon), length(tree.Muon_pt)
(100000, 100000, 100000)
Selections
Vectorized style is a somewhat emphasized point in Python programming especially if one wants performance. However, numpy.sum()
is nothing more than a loop written in C.
Since Julia is not slow, from now on, we deviate from uproot tutorial in that we are allowed to use for-loop whenever appropriate which has several benefits:
More expressive
Less memory allocation
Since Julia is not slow, from now on, we deviate from uproot tutorial in that we are allowed to use for-loop whenever appropriate which has several benefits:
More expressive
Less memory allocation
Selections from 1D arrays
# allocate a big intermedate array, not very Julia
single_muon_mask = tree.nMuon .== 1
100000-element BitVector: 0 0 1 0 0 0 0 ⋮ 0 0 0 0 0 0
Counting with selections (uproot way)
sum(single_muon_mask)
13447
Applying a mask to an array
Just because you can doesn't mean you should, I can't think of a good reason to do this kind of mask and allocation other than pedagogy.
tree.Muon_pt[single_muon_mask]
# notice, @view should also work
13447-element Vector{SubArray{Float32, 1, Vector{Float32}, Tuple{UnitRange{Int64}}, true}}: [3.2753265] [3.837803] [16.145521] [13.342755] [8.5756445] [3.6105926] [11.0238] ⋮ [14.568481] [3.3424969] [4.893041] [8.18912] [13.272973] [9.484549]
length(tree.Muon_pt[single_muon_mask])
13447
Counting with selections (Julia way)
# minimal mem allocation -> faster
sum(==(1), tree.nMuon)
13447
Plotting with selections
begin
h_selection = Hist1D(flatten(tree.Muon_pt[single_muon_mask]), 0:100)
plot(h_selection;
figure=(resolution = (400, 300), ),
axis=(xlabel=L"\mathrm{Muon}~p_{\mathrm{T}}~\mathrm{[GeV]}",
ylabel="# of muons / 1 GeV",
yscale=Makie.pseudolog10, yticks = 10 .^ (1:4)
)
)
end
Selections from a jagged array the Julia way
mapreduce(+, tree.Muon_eta) do etas
count(x -> -2 < x < 2, etas)
end
204564
with_theme(ATLASTHEME) do
bins = range(-2.5, 2.5; length=51)
# Set up
fig = Figure(; resolution = (500, 700))
h_eta = Hist1D(; bins) # shot-hand for `Hist1D(; bins=bins)`
h_eta_sel = Hist1D(; bins)
for η in flatten(tree.Muon_eta)
push!(h_eta, η) # always fill upper histogran
abs(η) >= 2 && continue
push!(h_eta_sel, η) # fill lower histogram
end
# Plot upper axis
plot(fig[1,1], h_eta;
axis=(
title="No selection",
xlabel=L"\mathrm{Muon}~\eta",
ylabel="# of muons",
)
)
# Plot lower axis
plot(fig[2,1], h_eta_sel;
axis=(
title="With |η| < 2 selection",
xlabel=L"\mathrm{Muon}~\eta",
ylabel="# of muons",
)
)
fig
end
Comparing histograms
begin
h_pt1 = Hist1D(; bins = 0:2:50)
h_pt2 = Hist1D(; bins = 0:2:50)
for evt in tree
(; nMuon, Muon_pt, Muon_eta) = evt
nMuon != 1 && continue # single_muon_mask
eta_mask = @. abs(Muon_eta) < 2
push!.(h_pt1, Muon_pt[eta_mask])
push!.(h_pt2, Muon_pt[(~).(eta_mask)])
end
end
with_theme(ATLASTHEME) do
stairs(
h_pt1; label = L"|\eta|~<~2",
figure = (; resolution = (500, 400)),
axis = (ylabel = "Number of single muons / 2 GeV",)
)
stairs!(h_pt2; label = L"|\eta|~\geq~2")
axislegend()
current_figure()
end
with_theme(ATLASTHEME) do
stairs(
normalize(h_pt1); label = L"|\eta|~<~2",
figure = (; resolution = (500, 400)),
axis = (title="Normalized", ylabel = "Number of single muons / 2 GeV")
)
stairs!(normalize(h_pt2); label = L"|\eta|~\geq~2")
axislegend()
current_figure()
end
We want to emphasize again the fact that Julia doesn't slow down when doing row-based analysis, and we find the flexibility and scalability appealing.
We cite some timing from uproot python tutorial for comparison:
We cite some timing from uproot python tutorial for comparison:
Python row-based:
CPU times: user 4.78 s, sys: 77.2 ms, total: 4.86 s
Wall time: 4.88 s
Python columnar:
CPU times: user 5.18 ms, sys: 1 ms, total: 6.18 ms
Wall time: 4.87 ms
With Julia, we can easily find situation where row-based is faster than columnar:
using BenchmarkTools
@benchmark mapreduce(+, $tree.Muon_eta) do etas
count(x -> -2 < x < 2, etas)
end
BenchmarkTools.Trial: 684 samples with 1 evaluation. Range (min … max): 6.467 ms … 11.892 ms ┊ GC (min … max): 0.00% … 0.00% Time (median): 7.233 ms ┊ GC (median): 0.00% Time (mean ± σ): 7.303 ms ± 527.207 μs ┊ GC (mean ± σ): 0.00% ± 0.00% ▁▁▂█▅▆█▄▆ ▂▂▃▃▅▆▄▇▇▇███████████▇▅▄▅▃▃▃▃▃▃▂▂▁▂▂▂▃▃▁▂▂▂▁▁▃▂▁▂▁▂▂▁▂▂▁▁▁▂ ▃ 6.47 ms Histogram: frequency by time 9.38 ms < Memory estimate: 0 bytes, allocs estimate: 0.
Getting Physics-Relevant Information
using LorentzVectorHEP
begin
hist_ΔR = Hist1D(; bins = 0:0.05:6)
for evt in tree
evt.nMuon != 2 && continue #two_muons_mask
(; Muon_pt, Muon_eta, Muon_phi, Muon_mass) = evt
two_muons_p4 = LorentzVectorCyl.(Muon_pt, Muon_eta, Muon_phi, Muon_mass)
# we know:
# two_muons_p4::Vector{LorentzVectorCyl} and its length == 2
push!(hist_ΔR, deltar(two_muons_p4...))
end
plot(hist_ΔR;
figure = (; resolution = (500, 400)),
axis = (xlabel = "ΔR between muons", ylabel = "Number of two-muon events")
)
end
Opposite sign muons
with_theme(ATLASTHEME) do
log_bins = 10 .^ range(log10(0.1), log10(1000); length=200)
hist_inv = Hist1D(; bins = log_bins)
for evt in tree
evt.nMuon != 2 && continue #two_muons_mask
(; Muon_charge) = evt
Muon_charge[1] == Muon_charge[2] && continue # require oppo sign
(; Muon_pt, Muon_eta, Muon_phi, Muon_mass) = evt
two_muons_p4 = LorentzVectorCyl.(Muon_pt, Muon_eta, Muon_phi, Muon_mass)
# we know:
# two_muons_p4::Vector{LorentzVectorCyl} and its length == 2
push!(hist_inv, mass(sum(two_muons_p4)))
end
stairs(hist_inv;
figure = (; resolution = (600, 400)),
axis = (xlabel = "Dimuon invariant mass [GeV]",
xscale = Makie.pseudolog10,
xticks = [0.1, 1, 10, 100, 1000],
yscale = Makie.pseudolog10,
yticks = [1, 10, 100, 1000],
ylabel = "Number of dimuon events")
)
# text annotation
ps = Dict("Z"=>91.2, "J/Ψ"=>3.09, "Υ"=>9.4, "Ψ(2S)"=>3.686, "ϕ"=>1.019, "ρ"=>0.775, "η"=>0.52)
for (k,v) in ps
text!(v, lookup(hist_inv, v); text = k, align = (:center, :baseline))
end
current_figure()
end