UnROOT Tutorial · HSF_Julia

Opening Files

using UnROOT

file = ROOTFile(joinpath(pkg_dir, "unroot-tutorial-file.root"))

ROOTFile with 1 entry and 23 streamers.
/home/runner/work/HSF_Julia_Tutorial/HSF_Julia_Tutorial/unroot-tutorial-file.root
└─ Events (TTree)
   ├─ "nMuon"
   ├─ "Muon_pt"
   ├─ "Muon_eta"
   ├─ "Muon_phi"
   ├─ "Muon_mass"
   └─ "Muon_charge"

File contents

keys(file)

1-element Vector{String}:
 "Events"

typeof(LazyTree(file, "Events"))

LazyTree with 6 branches:
Muon_phi, nMuon, Muon_pt, Muon_eta, Muon_charge...

Accessing contents

tree = LazyTree(file, "Events");

# you can also use `names()` if you want a `Vector{String}`
propertynames(tree)

(:Muon_phi, :nMuon, :Muon_pt, :Muon_eta, :Muon_charge, :Muon_mass)

Trees, Branches, and Events

tree.nMuon

100000-element UnROOT.LazyBranch{Int32, UnROOT.Nojagg, Vector{Int32}}: 
 2
 2
 1
 4
 4
 3
 2
 ⋮
 0
 3
 2
 3
 2
 3

# more DataFrames.jl syntax
tree[:, :Muon_pt]

100000-element UnROOT.LazyBranch{SubArray{Float32, 1, Vector{Float32}, Tuple{UnitRange{Int64}}, true}, UnROOT.Nooffsetjagg, ArraysOfArrays.VectorOfVectors{Float32, Vector{Float32}, Vector{Int32}, Vector{Tuple{}}}}: 
 Float32[10.763697, 15.736523]
 Float32[10.53849, 16.327097]
 Float32[3.2753265]
 Float32[11.429154, 17.634033, 9.624728, 3.5022252]
 Float32[3.2834418, 3.6440058, 32.911224, 23.721754]
 Float32[3.566528, 4.572504, 4.371863]
 Float32[57.6067, 53.04508]
 ⋮
 Float32[]
 Float32[30.298992, 9.889153, 6.6907263]
 Float32[46.36219, 43.90394]
 Float32[6.343258, 6.9803934, 5.0852466]
 Float32[3.3099499, 15.68049]
 Float32[11.444268, 3.082721, 4.9692106]

Events

tree.Muon_pt[begin] #or [1]

2-element view(::Vector{Float32}, 1:2) with eltype Float32:
 10.763697
 15.736523

# more DataFrames.jl syntax
tree[3, :Muon_pt]

1-element view(::Vector{Float32}, 5:5) with eltype Float32:
 3.2753265

tree[1]

UnROOT.LazyEvent at index 1 with 6 columns:
(Muon_phi = Float32[-0.034272723, 2.5426154], nMuon = 2, Muon_pt = Float32[10.763697, 15.736523], Muon_eta = Float32[1.0668273, -0.5637865], Muon_charge = Int32[-1, -1], Muon_mass = Float32[0.10565837, 0.10565837])

# to materialize it, don't do this in event loop
Tuple(first(tree))

(Float32[-0.034272723, 2.5426154], 2, Float32[10.763697, 15.736523], Float32[1.0668273, -0.5637865], Int32[-1, -1], Float32[0.10565837, 0.10565837])

Histograms

# We have Makie recipe for histograms, implemented in FHist.jl
using FHist, CairoMakie

plot(Hist1D(tree.nMuon), figure=(resolution = (400, 300),))

stairs(Hist1D(tree.nMuon, 0:10);
    figure=(resolution = (400, 300), ),
    axis=(xlabel="# of muons in event", ylabel="# of events")
)

Histogramming a jagged array

begin
    using LaTeXStrings
    using Base.Iterators: flatten #stdlib, save some typing
end

begin
    h = Hist1D(flatten(tree.Muon_pt), 0:100)
    plot(h;
        figure=(resolution = (400, 300), ),
        axis=(xlabel=L"\mathrm{Muon}~p_{\mathrm{T}}~\mathrm{[GeV]}", ylabel="# of muons / 1 GeV")
    )
end

Logarithmic scales

begin
    log_xs = 10 .^ range(log10(1), log10(100); length=100)
    h_logscape = Hist1D(flatten(tree.Muon_pt), log_xs)
    
    stairs(h_logscape;
            figure=(resolution = (400, 300), ),
            axis=(xlabel=L"\mathrm{Muon}~p_{\mathrm{T}}~\mathrm{[GeV]}", 
            ylabel="# of muons / 1 GeV",
            xscale=Makie.pseudolog10,
            xticks=[1,10,100]
            )
    )
end

Columnar? Row-based? both!

Counting

length(tree), length(tree.nMuon), length(tree.Muon_pt)

(100000, 100000, 100000)

Selections

Vectorized style can be traps

Vectorized style is a somewhat emphasized point in Python programming especially if one wants performance. However, numpy.sum() is nothing more than a loop written in C.

Since Julia is not slow, from now on, we deviate from uproot tutorial in that we are allowed to use for-loop whenever appropriate which has several benefits:

More expressive
Less memory allocation

Since Julia is not slow, from now on, we deviate from uproot tutorial in that we are allowed to use for-loop whenever appropriate which has several benefits:

More expressive
Less memory allocation

Selections from 1D arrays

# allocate a big intermedate array, not very Julia
single_muon_mask = tree.nMuon .== 1

100000-element BitVector:
 0
 0
 1
 0
 0
 0
 0
 ⋮
 0
 0
 0
 0
 0
 0

Counting with selections (uproot way)

sum(single_muon_mask)

Applying a mask to an array

Info

Just because you can doesn't mean you should, I can't think of a good reason to do this kind of mask and allocation other than pedagogy.

tree.Muon_pt[single_muon_mask]

# notice, @view should also work

13447-element Vector{SubArray{Float32, 1, Vector{Float32}, Tuple{UnitRange{Int64}}, true}}:
 [3.2753265]
 [3.837803]
 [16.145521]
 [13.342755]
 [8.5756445]
 [3.6105926]
 [11.0238]
 ⋮
 [14.568481]
 [3.3424969]
 [4.893041]
 [8.18912]
 [13.272973]
 [9.484549]

length(tree.Muon_pt[single_muon_mask])

Counting with selections (Julia way)

# minimal mem allocation -> faster
sum(==(1), tree.nMuon)

Plotting with selections

begin
    h_selection = Hist1D(flatten(tree.Muon_pt[single_muon_mask]), 0:100)
    plot(h_selection;
            figure=(resolution = (400, 300), ),
            axis=(xlabel=L"\mathrm{Muon}~p_{\mathrm{T}}~\mathrm{[GeV]}", 
            ylabel="# of muons / 1 GeV",
            yscale=Makie.pseudolog10, yticks = 10 .^ (1:4)
            )
    )
end

Selections from a jagged array the Julia way

mapreduce(+, tree.Muon_eta) do etas
    count(x -> -2 < x < 2, etas)
end

with_theme(ATLASTHEME) do 
    bins = range(-2.5, 2.5; length=51)
    # Set up
    fig = Figure(; resolution = (500, 700))
    h_eta = Hist1D(; bins) # shot-hand for `Hist1D(; bins=bins)`
    h_eta_sel = Hist1D(; bins)

    for η in flatten(tree.Muon_eta)
        push!(h_eta, η) # always fill upper histogran
        abs(η) >= 2 && continue
        push!(h_eta_sel, η) # fill lower histogram
    end

    # Plot upper axis
    plot(fig[1,1], h_eta;
            axis=(
                title="No selection", 
                xlabel=L"\mathrm{Muon}~\eta",
                ylabel="# of muons",
            )
        )
    # Plot lower axis
    plot(fig[2,1], h_eta_sel;
            axis=(
                title="With |η| < 2 selection", 
                xlabel=L"\mathrm{Muon}~\eta",
                ylabel="# of muons",
            )
        )
    fig
end

Comparing histograms

begin
        h_pt1 = Hist1D(; bins = 0:2:50)
        h_pt2 = Hist1D(; bins = 0:2:50)
        for evt in tree
            (; nMuon, Muon_pt, Muon_eta) = evt
            nMuon != 1 && continue # single_muon_mask
            eta_mask = @. abs(Muon_eta) < 2
            push!.(h_pt1, Muon_pt[eta_mask])
            push!.(h_pt2, Muon_pt[(~).(eta_mask)])
        end
        
end

with_theme(ATLASTHEME) do
    stairs(
        h_pt1; label = L"|\eta|~<~2",
        figure = (; resolution = (500, 400)),
        axis = (ylabel = "Number of single muons / 2 GeV",)
    )
    stairs!(h_pt2; label = L"|\eta|~\geq~2")

    axislegend()
    current_figure()
end

with_theme(ATLASTHEME) do
    stairs(
        normalize(h_pt1); label = L"|\eta|~<~2",
        figure = (; resolution = (500, 400)),
        axis = (title="Normalized", ylabel = "Number of single muons / 2 GeV")
    )
    stairs!(normalize(h_pt2); label = L"|\eta|~\geq~2")

    axislegend()
    current_figure()
end

Columnar vs. row-based analysis

We want to emphasize again the fact that Julia doesn't slow down when doing row-based analysis, and we find the flexibility and scalability appealing.

We cite some timing from uproot python tutorial for comparison:

Python row-based:

CPU times: user 4.78 s, sys: 77.2 ms, total: 4.86 s
Wall time: 4.88 s

Python columnar:

CPU times: user 5.18 ms, sys: 1 ms, total: 6.18 ms
Wall time: 4.87 ms

With Julia, we can easily find situation where row-based is faster than columnar:

using BenchmarkTools

@benchmark mapreduce(+, $tree.Muon_eta) do etas
    count(x -> -2 < x < 2, etas)
end

BenchmarkTools.Trial: 684 samples with 1 evaluation.
 Range (min … max):  6.467 ms …  11.892 ms  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     7.233 ms               ┊ GC (median):    0.00%
 Time  (mean ± σ):   7.303 ms ± 527.207 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%

            ▁▁▂█▅▆█▄▆                                          
  ▂▂▃▃▅▆▄▇▇▇███████████▇▅▄▅▃▃▃▃▃▃▂▂▁▂▂▂▃▃▁▂▂▂▁▁▃▂▁▂▁▂▂▁▂▂▁▁▁▂ ▃
  6.47 ms         Histogram: frequency by time        9.38 ms <

 Memory estimate: 0 bytes, allocs estimate: 0.

Getting Physics-Relevant Information

using LorentzVectorHEP

begin 
    hist_ΔR = Hist1D(; bins = 0:0.05:6)
    for evt in tree
        evt.nMuon != 2 && continue #two_muons_mask
        (; Muon_pt, Muon_eta, Muon_phi, Muon_mass) = evt
        two_muons_p4 = LorentzVectorCyl.(Muon_pt, Muon_eta, Muon_phi, Muon_mass)
        # we know:
        # two_muons_p4::Vector{LorentzVectorCyl} and its length == 2

        push!(hist_ΔR, deltar(two_muons_p4...))
    end
    plot(hist_ΔR; 
        figure = (; resolution = (500, 400)),
        axis = (xlabel = "ΔR between muons", ylabel = "Number of two-muon events")
    )
end

Opposite sign muons

with_theme(ATLASTHEME) do
    log_bins = 10 .^ range(log10(0.1), log10(1000); length=200)
    hist_inv = Hist1D(; bins = log_bins)
    for evt in tree
        evt.nMuon != 2 && continue #two_muons_mask
        (; Muon_charge) = evt
        Muon_charge[1] == Muon_charge[2] && continue # require oppo sign
        (; Muon_pt, Muon_eta, Muon_phi, Muon_mass) = evt
        two_muons_p4 = LorentzVectorCyl.(Muon_pt, Muon_eta, Muon_phi, Muon_mass)
        # we know:
        # two_muons_p4::Vector{LorentzVectorCyl} and its length == 2

        push!(hist_inv, mass(sum(two_muons_p4)))
    end
    stairs(hist_inv; 
        figure = (; resolution = (600, 400)),
        axis = (xlabel = "Dimuon invariant mass [GeV]",
        xscale = Makie.pseudolog10,
        xticks = [0.1, 1, 10, 100, 1000],
        yscale = Makie.pseudolog10,
        yticks = [1, 10, 100, 1000],
        ylabel = "Number of dimuon events")
    )
    # text annotation
    ps = Dict("Z"=>91.2, "J/Ψ"=>3.09, "Υ"=>9.4, "Ψ(2S)"=>3.686, "ϕ"=>1.019, "ρ"=>0.775, "η"=>0.52)
    for (k,v) in ps
        text!(v, lookup(hist_inv, v); text = k, align = (:center, :baseline))
    end

    current_figure()
end