// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
//
// FreeVirtualProcessorRoot.cpp
//
// Part of the ConcRT Resource Manager -- this header file contains the internal implementation for the free virtual
// processor root (represents a virtual processor as handed to a scheduler).
//
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#include "concrtinternal.h"
namespace Concurrency
{
namespace details
{
///
/// Constructs a new free virtual processor root.
///
///
/// The scheduler proxy this root is created for. A scheduler proxy holds RM data associated with an instance of
/// a scheduler.
///
///
/// The processor node that this root belongs to. The processor node is one among the nodes allocated to the
/// scheduler proxy.
///
///
/// The index into the array of cores for the processor node specified.
///
FreeVirtualProcessorRoot::FreeVirtualProcessorRoot(SchedulerProxy *pSchedulerProxy, SchedulerNode* pNode, unsigned int coreIndex)
: VirtualProcessorRoot(pSchedulerProxy, pNode, coreIndex),
m_pExecutingProxy(NULL),
m_pDeactivatedProxy(NULL)
{
}
///
/// Deletes the virtual processor.
///
void FreeVirtualProcessorRoot::DeleteThis()
{
//
// This comes in via a Remove() call on one of two threads:
//
// - The thread that is running the virtual processor root.
// - There can be no race. We just need to make sure that the thread on exit doesn't touch us after deletion.
//
// - An arbitrary thread.
// - We need to be careful that we aren't racing between that thread's getting out (SwitchOut followed by returning from
// the context's dispatch loop), and it trying to reset the vproc root in ResetOnIdle. We must spin until that has happened.
//
FreeThreadProxy *pCurrentProxy = NULL;
DWORD tlsSlot = GetSchedulerProxy()->GetResourceManager()->GetExecutionResourceTls();
void * tlsPointer = platform::__TlsGetValue(tlsSlot);
size_t tlsValue = (size_t) tlsPointer;
if (tlsPointer != NULL && ((tlsValue & TlsResourceBitMask) == TlsResourceInProxy))
pCurrentProxy = (FreeThreadProxy *) (tlsValue & ~TlsResourceInProxy);
if (pCurrentProxy != NULL && pCurrentProxy == m_pExecutingProxy)
{
pCurrentProxy->SetVirtualProcessorRoot(NULL);
}
else
{
//
// Spin wait until there isn't anything running atop this virtual processor root. Yes -- this means that someone had better be
// on the way out. If you call Remove on a virtual processor that's still running something, the resulting behavior is pretty much
// undefined anyway.
//
SpinUntilIdle();
}
delete this;
}
///
/// Called in order to reset this virtual processor root to a completely quiescent state (not running anything).
///
///
/// Indicates the state of the thread proxy that is making the call. The parameter is of type .
///
void FreeVirtualProcessorRoot::ResetOnIdle(SwitchingProxyState switchState)
{
FreeThreadProxy *pOriginalProxy = static_cast(m_pExecutingProxy);
LONG newVal = InterlockedDecrement(&m_activationFence);
if (newVal <= 0)
{
//
// The value could be -1 if we raced with the virtual processor root being removed on a different thread.
//
ASSERT(newVal >= -1);
//
// The fence going down to zero arbitrates between a possible reset/remove race.
//
if (newVal == 0)
Unsubscribe();
m_pExecutingProxy = NULL;
//
// *** READ THIS ***:
//
// It is imperative on this path that once m_pExecutingProxy has been set to NULL, nothing touches the this pointer. We are the race
// resolution between a client getting off a vproc and removing it. There can be a race between removal (DeleteThis) from outside and
// a SwitchOut (here) on the vproc.
//
if (switchState == Blocking)
{
pOriginalProxy->SuspendExecution();
}
}
else
{
Concurrency::IExecutionContext *pActivatedContext = AcquireActivatedContext();
ASSERT(newVal == 1 && pActivatedContext != NULL);
//
// This means we had a race between an Activate and an Idling (via either SwitchOut or return from dispatch loop). In either
// of these cases, we stashed away the context which was activated in m_pActivatedContext. This context now needs to run atop us.
//
FreeThreadProxy *pProxy = static_cast (pActivatedContext->GetProxy());
ASSERT(pProxy != NULL);
//
// While it is safe to run through an X->X context switch after the blocked flag is set, there is no point. If we raced a SwitchOut/Activate
// for the same proxy on the same vproc, it's a NOP.
//
if (pOriginalProxy != pProxy)
{
pOriginalProxy->SwitchTo(pActivatedContext, switchState);
}
}
}
///
/// Causes the scheduler to start running a thread proxy on the specified virtual processor root which will execute
/// the Dispatch method of the context supplied by pContext. Alternatively, it can be used to resume a
/// virtual processor root that was de-activated by a previous call to Deactivate.
///
///
/// The context which will be dispatched on a (potentially) new thread running atop this virtual processor root.
///
void FreeVirtualProcessorRoot::Activate(Concurrency::IExecutionContext *pContext)
{
if (pContext == NULL)
throw std::invalid_argument("pContext");
//
// If the context is being reused, it had better return a NULL thread proxy when we ask! This is part of the spec contract.
//
FreeThreadProxy * pProxy = static_cast (pContext->GetProxy());
if (pProxy == NULL)
{
pProxy = static_cast (GetSchedulerProxy()->GetNewThreadProxy(pContext));
}
//
// All calls to Activate after the first one can potentially race with the paired deactivate. This is allowed by the API, and we use the fence below
// to reduce kernel transitions in case of this race.
//
// We must also be careful because calls to activate can race with ResetOnIdle from either a SwitchOut() or a return from dispatch and we must
// be prepared to deal with this and the implications around trying to bind pContext.
//
LONG newVal = InterlockedIncrement(&m_activationFence);
if (newVal == 2)
{
ASSERT(m_pDeactivatedProxy == NULL);
//
// We received two activations in a row. According to the contract with the client, this is allowed, but we should expect a deactivation, a
// SwitchOut, or a return from dispatch loop soon after.
//
// Simply return instead of signalling the event. The deactivation will reduce the count back to 1. In addition, we're not responsible
// for changing the idle state on the core.
//
SetActivatedContext(pContext);
}
else
{
ASSERT(newVal == 1);
SpinUntilIdle();
ASSERT(m_pExecutingProxy == m_pDeactivatedProxy);
if (m_pExecutingProxy != NULL)
{
//
// The root already has an associated thread proxy. Check that the context provided is associated with
// the same proxy.
//
if (pProxy != m_pExecutingProxy)
{
//
// This is a fatal exception. We can potentially correct the state of the fence, but the scheduler is beyond confused about
// the spec. @TODO: Is it worth making some attempt to correct *our* state given that it's already messed up above us?
//
throw invalid_operation();
}
}
m_pDeactivatedProxy = NULL;
//
// An activated root increases the subscription level on the underlying core.
//
Subscribe();
//
// Affinitization sets this as the executing proxy for the virtual processor root.
//
Affinitize(pProxy);
ASSERT(m_pExecutingProxy == pProxy);
ASSERT(pProxy->GetVirtualProcessorRoot() != NULL);
ASSERT(pProxy->GetExecutionContext() != NULL);
pProxy->ResumeExecution();
}
}
///
/// Causes the thread proxy running atop this virtual processor root to temporarily stop dispatching pContext.
///
///
/// The context which should temporarily stop being dispatched by the thread proxy running atop this virtual processor root.
///
bool FreeVirtualProcessorRoot::Deactivate(Concurrency::IExecutionContext *pContext)
{
if (pContext == NULL)
throw std::invalid_argument("pContext");
if (m_pExecutingProxy == NULL)
throw invalid_operation();
FreeThreadProxy * pProxy = static_cast (pContext->GetProxy());
if (m_pExecutingProxy != pProxy)
{
throw invalid_operation();
}
LONG newVal = InterlockedDecrement(&m_activationFence);
if (newVal == 0)
{
//
// Reduce the subscription level on the core while the root is suspended. The count is used by dynamic resource management
// to tell which cores allocated to a scheduler are unused, so that they can be temporarily repurposed.
//
InterlockedExchangePointer(reinterpret_cast(&m_pDeactivatedProxy), m_pExecutingProxy);
Unsubscribe();
pProxy->SuspendExecution();
}
else
{
//
// There should be no Deactivate/Remove races.
//
ASSERT(newVal == 1);
Concurrency::IExecutionContext *pActivatedContext = AcquireActivatedContext();
//
// If we got here, it means while activated we saw an activation of pCtxX and a subsequent deactivation of pCtxY. These contexts
// must be equal to be spec legal.
//
ASSERT(pActivatedContext == pContext);
//
// The activation for this deactivation came in early, so we return early here without making a kernel transition.
//
}
return true;
}
///
/// Forces all data in the memory heirarchy of one processor to be visible to all other processors.
///
///
/// The context which is currently being dispatched by this root.
///
void FreeVirtualProcessorRoot::EnsureAllTasksVisible(Concurrency::IExecutionContext *pContext)
{
if (pContext == NULL)
throw std::invalid_argument("pContext");
if (m_pExecutingProxy == NULL)
throw invalid_operation();
FreeThreadProxy * pProxy = static_cast (pContext->GetProxy());
if (m_pExecutingProxy != pProxy)
{
throw invalid_operation();
}
GetSchedulerProxy()->GetResourceManager()->FlushStoreBuffers();
}
///
/// Called to affinitize the given thread proxy to this virtual processor.
///
///
/// The new thread proxy to run atop this virtual processor root.
///
void FreeVirtualProcessorRoot::Affinitize(FreeThreadProxy *pThreadProxy)
{
//
// Wait until the thread proxy is firmly blocked. This is essential to prevent vproc root orphanage
// if the thread proxy we're switching to is IN THE PROCESS of switching out to a different one. An example of how this
// could happen:
// 1] ctxA is running on vp1. It is in the process of blocking, and wants to switch to ctxB. This means ctxA's thread proxy
// tpA must affinitize ctxB's thread proxy tpB to its own vproc root, vproot1.
// 2] At the exact same time, ctxA is unblocked by ctxY and put onto a runnables collection in its scheduler. Meanwhile, ctxZ
// executing on vp2, has also decided to block. It picks ctxA off the runnables collection, and proceeds to switch to it.
// This means that ctxZ's thread proxy tpZ must affinitize ctxA's thread proxy tpA to ITS vproc root vproot2.
// 3] Now, if tpZ affinitizes tpA to vproot2 BEFORE tpA has had a chance to affinitize tpB to vproot1, tpB gets mistakenly
// affinitized to vproot2, and vproot1 is orphaned.
// In order to prevent this, tpZ MUST wait until AFTER tpA has finished its affinitization. This is indicated via the
// blocked flag. tpA will set its blocked flag to 1, after it has finished affintizing tpB to vproot1, at which point it is
// safe for tpZ to modify tpA's vproc root and change it from vproot1 to vproot2.
//
pThreadProxy->SpinUntilBlocked();
m_pExecutingProxy = pThreadProxy;
pThreadProxy->SetVirtualProcessorRoot(this);
HardwareAffinity newAffinity = GetSchedulerProxy()->GetNodeAffinity(GetNodeId());
pThreadProxy->SetAffinity(newAffinity);
}
} // namespace details
} // namespace Concurrency