Skip to content

Commit

Permalink
Merge pull request #433 from lanl/jmm/nqt-o2
Browse files Browse the repository at this point in the history
  • Loading branch information
Yurlungur authored Nov 21, 2024
2 parents a3befc2 + 32abaad commit d7af402
Show file tree
Hide file tree
Showing 16 changed files with 551 additions and 175 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ jobs:
#..
make
make install
make test
ctest --output-on-failure
75 changes: 61 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,17 @@ option(SINGULARITY_BETTER_DEBUG_FLAGS "Better debug flags for singularity" ON)
option(SINGULARITY_HIDE_MORE_WARNINGS "hide more warnings" OFF)

# toggle code options
option(SINGULARITY_USE_SINGLE_LOGS
"Use single precision logs. Can harm accuracy." OFF)
option(SINGULARITY_USE_TRUE_LOG_GRIDDING
"Use grids that conform to log spacing." OFF)
# TODO(JMM): Should this automatically be activated when true log gridding is
# off?
cmake_dependent_option(
SINGULARITY_USE_HIGH_RISK_MATH
"Use integer aliased logs, may not be portable" OFF
"NOT SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF)
"Use grids that conform to log spacing." OFF)
cmake_dependent_option(SINGULARITY_USE_SINGLE_LOGS
"Use single precision logs. Only available for true log gridding. Can harm accuracy."
OFF "SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF)
option(SINGULARITY_NQT_ORDER_1
"In NQT logs, use first order. Faster but less accurate."
OFF)
option(SINGULARITY_NQT_PORTABLE
"In NQT logs, use portable, rather than bithacked implementation. Slower, but more likely to function on exotic architectures."
OFF)

# misc options
option(SINGULARITY_FORCE_SUBMODULE_MODE "Submodule mode" OFF)
Expand All @@ -126,6 +127,26 @@ set(SINGULARITY_PLUGINS "" CACHE STRING "List of paths to plugin directories")
set(SINGULARITY_VARIANT "singularity-eos/eos/default_variant.hpp" CACHE STRING
"The include path for the file containing the definition of singularity::EOS.")

# Detect ARM architecture
set(SINGULARITY_ON_ARM OFF CACHE BOOL "We are running on an ARM system")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
if (NOT SINGULARITY_USE_CUDA)
message(STATUS
"ARM architecture detected: ${CMAKE_SYSTEM_PROCESSOR}")
set(SINGULARITY_ON_ARM ON CACHE BOOL
"We are running on an ARM system")
endif()
endif()

if (SINGULAIRTY_ON_ARM)
if (NOT SINGULARITY_USE_TRUE_LOG_GRIDDING)
message(WARNING
"Fast logs not necessarily better on ARM CPU systems. "
"You may wish to build with "
"-DSINGULARITY_USE_TRUE_LOG_GRIDDING=ON.")
endif()
endif()

# ------------------------------------------------------------------------------#
# singularity-eos Library
# ------------------------------------------------------------------------------#
Expand Down Expand Up @@ -163,6 +184,15 @@ if(SINGULARITY_USE_FORTRAN)
include(CMakeDetermineFortranCompiler)
endif()

# Big endianness
include(TestBigEndian)
TEST_BIG_ENDIAN(IS_BIG_ENDIAN)
if (BIG_ENDIAN)
message(WARNING "Big endian detected! "
"Integer aliasing as currently implemented will not function. "
"Please set -DSINGULARITY_NQT_PORTABLE=ON.")
endif()

include(GNUInstallDirs)

if(SINGULARITY_BUILD_PYTHON)
Expand Down Expand Up @@ -254,17 +284,34 @@ if(SINGULARITY_BUILD_SESAME2SPINER)
add_subdirectory(sesame2spiner)
endif()

# Define the full version as a string macro
target_compile_definitions(singularity-eos_Interface INTERFACE
SINGULARITY_VERSION=\"${PROJECT_VERSION}\"
)

# Optionally, define major, minor, and patch versions separately
target_compile_definitions(singularity-eos_Interface INTERFACE
SINGULARITY_VERSION_MAJOR=${PROJECT_VERSION_MAJOR}
SINGULARITY_VERSION_MINOR=${PROJECT_VERSION_MINOR}
SINGULARITY_VERSION_PATCH=${PROJECT_VERSION_PATCH}
)

# defines
if (SINGULARITY_USE_TRUE_LOG_GRIDDING)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING)
endif()
if(SINGULARITY_USE_SINGLE_LOGS)
target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_USE_SINGLE_LOGS)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_SINGLE_LOGS)
endif()
if(SINGULARITY_USE_HIGH_RISK_MATH)
if(SINGULARITY_NQT_ORDER_1)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_HIGH_RISK_MATH)
INTERFACE SINGULARITY_NQT_ORDER_1)
endif()
if (SINGULARITY_USE_TRUE_LOG_GRIDDING)
if(SINGULARITY_NQT_PORTABLE)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING)
INTERFACE SINGULARITY_NQT_PORTABLE)
endif()
if(SINGULARITY_TEST_SESAME)
target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_TEST_SESAME)
Expand Down
4 changes: 3 additions & 1 deletion doc/sphinx/src/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,10 @@ The main CMake options to configure building are in the following table:
``SINGULARITY_BETTER_DEBUG_FLAGS`` ON Enables nicer GPU debug flags. May interfere with in-tree builds as a submodule.
``SINGULARITY_HIDE_MORE_WARNINGS`` OFF Makes warnings less verbose. May interfere with in-tree builds as a submodule.
``SINGULARITY_FORCE_SUBMODULE_MODE`` OFF Force build in _submodule_ mode.
``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy).
``SINGULARITY_USE_TRUE_LOG_GRIDDING`` OFF Use grids that conform to logarithmic spacing.
``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy).
``SINGULARITY_NQT_ORDER_1`` OFF For fast logs, use the less accurate but faster 1st-order version.
``SINGULARITY_NQT_PORTABLE`` OFF For fast logs, use the slower but endianness-independent implementation.
====================================== ======= ===========================================

More options are available to modify only if certain other options or
Expand Down
50 changes: 29 additions & 21 deletions doc/sphinx/src/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ number :math:`x` is represented as a mantissa and an exponent in base
x = m 2^e
for mantissa :math:`m` and exponent :math:`e`. The mantiss is
for mantissa :math:`m` and exponent :math:`e`. The mantissa is
guaranteed to be on the interval :math:`[1/2, 1)`. The standard
library of most low-level languages provides a performant and portable
routine to pick apart this represnetation, ``frexp``, which given a
Expand All @@ -667,34 +667,42 @@ of the mantissa plus the exponent:
\lg(x) = \lg(m) + e
Therefore, if we can find a fast, invertible approximation to
:math:`\lg(m)`, we will have achieved our goal. It turns out the
expression
:math:`\lg(m)`, we will have achieved our goal. The linear
interpolation of :math:`\lg(m)` on the given interval is

.. math::
2 (x - 1)
works pretty well, so we use that. (To convince yourself of this note
that for :math:`x=1/2` this expression returns -1 and for :math:`x=1`,
it returns 0, which are the correct values of :math:`\lg(x)` at the
bounds of the interval.) Thus our approximate, invertible expression
for :math:`\lg` is just
and the quadratic is

.. math::
2 (m - 1) + e
for the mantissa and exponent extracted via ``frexp``. This differs
from :math:`lg` by a maximum of about 0.1, which translates to at most
a 25 percent difference. As discussed above, however, the function
itself is an exact representation of itself and the difference from
:math:`lg` is acceptable.

To invert, we use the built in function that inverts ``frexp``,
``ldexp``, which combines the mantissa and exponent into the original
floating point representation.

This approach is described in more detail in our `short note`_ on the topic.
-\frac{4}{3} (m -2) (m - 1)
where the former produces a function that is piecewise :math:`C^1` and
everywhere continuous. The latter produces a function that is
everywhere :math:`C^1` and piecewise :math:`C^2`. Both functions are
exactly exactly invertible. To invert, we use the built in function
that inverts ``frexp``, ``ldexp``, which combines the mantissa and
exponent into the original floating point representation.

While these functions are not exactly logarithms, they do work for
building logarithmic grids. The smoothness of the transformation
mapping from linear to "not-quite-log" space does matter for
interpolation, however. Linear interpolation in "not-quite-log" space
converges at second order only in the :math:`L^1` norm for the linear
version of the approximate log. The quadratic version of the fast log
provides second-order convergence in all norms, however.

Finally, while ``frexp`` and ``ldexp`` are portable and performant,
they are less performant than hand-implemented, low-level methods that
leverage the bitwise structure of floating point numbers. These
"bithacked" or "integer aliased" implementations are what are used in
practice in the code.

This approach is described in more detail in our `short note`_ on the
topic.

.. _Short note: https://arxiv.org/abs/2206.08957

Expand Down
14 changes: 9 additions & 5 deletions doc/sphinx/src/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1736,12 +1736,17 @@ return a ``Real`` number.
.. warning::
As with the SpinerEOS models, the stellar collapse models use fast
logs. You can switch the logs to true logs with the
``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option.
``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option. This may be
desirable on ARM-based architectures (e.g., ``aarch64``), where
a hardware log intrinsic is available.


.. note::
A more performant implementation of fast logs is available, but it
might not be portable. Enable it with the
``SINGULARITY_USE_HIGH_RISK_MATH`` cmake option.
The default implementation of our fast logs assumes little endian
numbers. If you are on a big-endian machine, they will not work
properly. If you encounter a big-endian machine, please report it
to us in the issues and (for now) enable the portable
implementation of fast logs with ``-DSINGULARITY_NQT_PORTABLE=ON``.

.. _Stellar Collapse: https://stellarcollapse.org/equationofstate.html

Expand All @@ -1750,7 +1755,6 @@ return a ``Real`` number.
.. _median filter: https://en.wikipedia.org/wiki/Median_filter



Helmholtz EOS
``````````````

Expand Down
6 changes: 6 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ add_executable(get_sound_speed_press
target_link_libraries(get_sound_speed_press PRIVATE
singularity-eos::singularity-eos)

if (SINGULARITY_USE_SPINER_WITH_HDF5)
add_executable(eos_grid eos_grid.cpp)
target_link_libraries(eos_grid PRIVATE
singularity-eos::singularity-eos)
endif()

if(SINGULARITY_USE_EOSPAC AND SINGULARITY_USE_SPINER_WITH_HDF5)
add_executable(get_sesame_state
get_sesame_state.cpp)
Expand Down
Loading

0 comments on commit d7af402

Please sign in to comment.