Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Applying omp_cpu_trans to the files excluded from the omp_gpu_trans #2756

Open
kaanolgu opened this issue Oct 24, 2024 · 1 comment
Open
Labels
NG-ARCH Issues relevant to the GPU parallelisation of LFRic and other models expected to be used in NG-ARCH

Comments

@kaanolgu
Copy link
Collaborator

Would this combined omp_cpu_trans and omp_gpu_trans files work ? I assume it will I haven't tested yet

from utils import (
    insert_explicit_loop_parallelism, normalise_loops, add_profiling,
    enhance_tree_information, OTHER_ISSUES, DONT_PARALLELISE)
from psyclone.psyir.nodes import (
    Loop, Routine, Directive, Assignment, OMPAtomicDirective)
from psyclone.psyir.transformations import OMPTargetTrans
from psyclone.transformations import (
    OMPLoopTrans, OMPDeclareTargetTrans, TransformationError)

PROFILING_ENABLED = False

# List of all files that psyclone will skip processing
FILES_TO_SKIP = OTHER_ISSUES + [
    "asminc.f90",
    "trosk.f90",    # TODO #1254
    "vremap.f90",   # Bulk assignment of a structure component
    "lib_mpp.f90",  # Compiler Error: Illegal substring expression
    "prtctl.f90",   # Compiler Error: Illegal substring expression
    "sbcblk.f90",   # Compiler Error: Vector expression used where scalar
                    # expression required
    "diadct.f90",   # Compiler Error: Wrong number of arguments in reshape
    "stpctl.f90",
    "lbcnfd.f90",
    "flread.f90",
    "sedini.f90",
    "diu_bulk.f90",  # Linking undefined reference
    "bdyini.f90",    # Linking undefined reference
    "trcrad.f90",
]

# List of files that will use CPU transformations instead of GPU
LIST_OF_CPU_TRANS = ["foo.f90", "bar.f90"]  # Example

def trans(psyir):
    ''' Add OpenMP Target and Loop directives to all loops, for GPU offloading,
    or apply CPU OpenMP threading directives depending on the file.

    :param psyir: the PSyIR of the provided file.
    :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer`

    '''
    if psyir.name in LIST_OF_CPU_TRANS:
        # Apply CPU transformations
        omp_parallel_trans = None
        omp_loop_trans = OMPLoopTrans(omp_schedule="static")
        omp_loop_trans.omp_directive = "paralleldo"
        print(f"Applying CPU transformations to file: {psyir.name}")
    else:
        # Apply GPU transformations
        omp_target_trans = OMPTargetTrans()
        omp_loop_trans = OMPLoopTrans(omp_schedule="static")
        omp_loop_trans.omp_directive = "loop"
        print(f"Applying GPU transformations to file: {psyir.name}")

    for subroutine in psyir.walk(Routine):

        if PROFILING_ENABLED:
            add_profiling(subroutine.children)

        enhance_tree_information(subroutine)

        normalise_loops(
            subroutine,
            hoist_local_arrays=(psyir.name not in LIST_OF_CPU_TRANS),
            convert_array_notation=True,
            loopify_array_intrinsics=(psyir.name not in LIST_OF_CPU_TRANS),
            convert_range_loops=True,
            hoist_expressions=(psyir.name not in LIST_OF_CPU_TRANS)
        )

        # Handle GPU and CPU cases
        if psyir.name in LIST_OF_CPU_TRANS:
            # CPU case
            if psyir.name not in DONT_PARALLELISE:
                insert_explicit_loop_parallelism(
                    subroutine,
                    region_directive_trans=omp_parallel_trans,
                    loop_directive_trans=omp_loop_trans,
                    collapse=False,
                    privatise_arrays=psyir.name != "ldftra.f90",
                )
        else:
            # GPU case
            # Skip processing for certain files
            if psyir.name.startswith("obs_"):
                return

            # Special cases and GPU transformations
            if psyir.name == "stpctl.f90":
                for loop in subroutine.walk(Loop):
                    if loop.ancestor(Directive):
                        continue
                    try:
                        omp_loop_trans.apply(loop, options={"force": True})
                    except TransformationError:
                        continue
                    omp_target_trans.apply(loop.parent.parent)
                    assigns = loop.walk(Assignment)
                    if len(assigns) == 1 and assigns[0].lhs.symbol.name == "zmax":
                        stmt = assigns[0]
                        if OMPAtomicDirective.is_valid_atomic_statement(stmt):
                            parent = stmt.parent
                            atomic = OMPAtomicDirective()
                            atomic.children[0].addchild(stmt.detach())
                            parent.addchild(atomic)
                continue

            if psyir.name not in DONT_PARALLELISE:
                insert_explicit_loop_parallelism(
                    subroutine,
                    region_directive_trans=omp_target_trans,
                    loop_directive_trans=omp_loop_trans,
                    collapse=True,
                )
                
@kaanolgu kaanolgu added the NG-ARCH Issues relevant to the GPU parallelisation of LFRic and other models expected to be used in NG-ARCH label Oct 24, 2024
@kaanolgu
Copy link
Collaborator Author

@addy419 @sergisiso regarding the topic discussed at the mattermost chat

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
NG-ARCH Issues relevant to the GPU parallelisation of LFRic and other models expected to be used in NG-ARCH
Projects
None yet
Development

No branches or pull requests

1 participant