RCU(Read Copy Update) -2-

 

RCU 동기화

synchronize_rcu()

kernel/rcu/tree_plugin.h

/**
 * synchronize_rcu - wait until a grace period has elapsed.
 *
 * Control will return to the caller some time after a full grace
 * period has elapsed, in other words after all currently executing RCU
 * read-side critical sections have completed.  Note, however, that
 * upon return from synchronize_rcu(), the caller might well be executing
 * concurrently with new RCU read-side critical sections that began while
 * synchronize_rcu() was waiting.  RCU read-side critical sections are
 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
 *
 * See the description of synchronize_sched() for more detailed information
 * on memory ordering guarantees.
 */
void synchronize_rcu(void)
{
        rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
                           !lock_is_held(&rcu_lock_map) &&
                           !lock_is_held(&rcu_sched_lock_map),
                           "Illegal synchronize_rcu() in RCU read-side critical section");
        if (!rcu_scheduler_active)
                return;
        if (rcu_expedited)
                synchronize_rcu_expedited();
        else
                wait_rcu_gp(call_rcu);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);

grace period가 지날때까지 기다린다.

  • if (!rcu_scheduler_active) return;
    • 아직 rcu 스케쥴러가 동작하지 않는 경우 처리를 포기하고 함수를 빠져나온다.
  • if (rcu_expedited) synchronize_rcu_expedited(); else  wait_rcu_gp(call_rcu);
    • /sys/kernel/rcu_expedited 값이 설정된 경우 일반 RCU grace period 방법을 사용하는 대신 더 신속한 Brute-force RCU grace period 방법을 사용한다.
    • 그렇지 않은 경우 일반 RCU grace period 방법을 사용한다.

 

synchronize_rcu_expedited()

kernel/rcu/tree_plugin.h

/**
 * synchronize_rcu_expedited - Brute-force RCU grace period
 *
 * Wait for an RCU-preempt grace period, but expedite it.  The basic
 * idea is to invoke synchronize_sched_expedited() to push all the tasks to
 * the ->blkd_tasks lists and wait for this list to drain.  This consumes
 * significant time on all CPUs and is unfriendly to real-time workloads,
 * so is thus not recommended for any sort of common-case code.
 * In fact, if you are using synchronize_rcu_expedited() in a loop,
 * please restructure your code to batch your updates, and then Use a
 * single synchronize_rcu() instead.
 */
void synchronize_rcu_expedited(void)
{
        unsigned long flags;
        struct rcu_node *rnp;
        struct rcu_state *rsp = &rcu_preempt_state;
        unsigned long snap;
        int trycount = 0;

        smp_mb(); /* Caller's modifications seen first by other CPUs. */
        snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
        smp_mb(); /* Above access cannot bleed into critical section. */

        /*
         * Block CPU-hotplug operations.  This means that any CPU-hotplug
         * operation that finds an rcu_node structure with tasks in the
         * process of being boosted will know that all tasks blocking
         * this expedited grace period will already be in the process of
         * being boosted.  This simplifies the process of moving tasks
         * from leaf to root rcu_node structures.
         */
        if (!try_get_online_cpus()) {
                /* CPU-hotplug operation in flight, fall back to normal GP. */
                wait_rcu_gp(call_rcu);
                return;
        }

        /*
         * Acquire lock, falling back to synchronize_rcu() if too many
         * lock-acquisition failures.  Of course, if someone does the
         * expedited grace period for us, just leave.
         */
        while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
                if (ULONG_CMP_LT(snap,
                    ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
                        put_online_cpus();
                        goto mb_ret; /* Others did our work for us. */
                }
                if (trycount++ < 10) {
                        udelay(trycount * num_online_cpus());
                } else {
                        put_online_cpus();
                        wait_rcu_gp(call_rcu);
                        return;
                }
        }
        if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
                put_online_cpus();
                goto unlock_mb_ret; /* Others did our work for us. */
        }
  • struct rcu_state *rsp = &rcu_preempt_state;
    • 전역 rcu_preempt_state 객체를 통해 rcu 트리 노드등을 사용할 목적으로 rsp 포인터에 대입한다.
  • snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
    • sync_rcu_preempt_exp_count 값을 읽어 1을 추가하여 snap에 대입한다.
  • if (!try_get_online_cpus()) { wait_rcu_gp(call_rcu); return; }
    • cpu hotplug용 mutex lock을 획득하지 못한 경우 일반 RCU grace period 방식을 사용한다.
    • 참고: Optimizing CPU hotplug locking | LWN.net
  • while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
    • contension 상황이라 아직 rcu용 mutex 락을 획득하지 못한 경우 루프를 돈다.
  • if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { put_online_cpus(); goto mb_ret; }
    • snap 값이 sync_rcu_preempt_exp_count 값보다 작은 경우 함수를 빠져나간다.
  • if (trycount++ < 10) { udelay(trycount * num_online_cpus()); } else { put_online_cpus(); wait_rcu_gp(call_rcu); return; }
    • 10번 이내 시도 시 시도 횟 수만큼 비례하여 약간의 딜레이를 갖는다. 시도가 10번 이상인 경우 일반 RCU grace period 방식을 사용하여 대기를 한 후 함수를 빠져나간다.
  • if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {  put_online_cpus(); goto unlock_mb_ret; }
    • mutex 락 획득 후 snap 값이 sync_rcu_preempt_exp_count 보다 작은 경우 함수를 빠져나간다.

 

        /* force all RCU readers onto ->blkd_tasks lists. */
        synchronize_sched_expedited();

        /* Initialize ->expmask for all non-leaf rcu_node structures. */
        rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irqsave(&rnp->lock, flags);
                smp_mb__after_unlock_lock();
                rnp->expmask = rnp->qsmaskinit;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        }

        /* Snapshot current state of ->blkd_tasks lists. */
        rcu_for_each_leaf_node(rsp, rnp)
                sync_rcu_preempt_exp_init(rsp, rnp);
        if (NUM_RCU_NODES > 1)
                sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));

        put_online_cpus();

        /* Wait for snapshotted ->blkd_tasks lists to drain. */
        rnp = rcu_get_root(rsp);
        wait_event(sync_rcu_preempt_exp_wq,
                   sync_rcu_preempt_exp_done(rnp));

        /* Clean up and exit. */
        smp_mb(); /* ensure expedited GP seen before counter increment. */
        ACCESS_ONCE(sync_rcu_preempt_exp_count) =
                                        sync_rcu_preempt_exp_count + 1;
unlock_mb_ret:
        mutex_unlock(&sync_rcu_preempt_exp_mutex);
mb_ret:
        smp_mb(); /* ensure subsequent action seen after grace period. */
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  • synchronize_sched_expedited();
  • rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); rnp->expmask = rnp->qsmaskinit; raw_spin_unlock_irqrestore(&rnp->lock, flags); }
    • leaf 노드가 아닌 rcu 노드에 대해 루프를 돌며 rnp->expmask에 rnp->qsmaskinit을 대입하여 초기화한다.
  • rcu_for_each_leaf_node(rsp, rnp) sync_rcu_preempt_exp_init(rsp, rnp);
    • leaf 노드에 대해 루프를 돌며
  • if (NUM_RCU_NODES > 1) sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
    • RCU 노드가 2단계 레벨 이상 구성된 경우
  • rnp = rcu_get_root(rsp); wait_event(sync_rcu_preempt_exp_wq, sync_rcu_preempt_exp_done(rnp));
    • sync_rcu_preempt_exp_wq에 이벤트가 도착하고 루트노드에서 RCU expedited grace period가 진행중이지 않을 때까지 기다린다.
  • ACCESS_ONCE(sync_rcu_preempt_exp_count) = sync_rcu_preempt_exp_count + 1
    • sync_rcu_preempt_exp_count를 1 증가시킨다.

 

wait_rcu_gp()

kernel/rcu/update.c

void wait_rcu_gp(call_rcu_func_t crf)
{
        struct rcu_synchronize rcu;

        init_rcu_head_on_stack(&rcu.head);
        init_completion(&rcu.completion);
        /* Will wake me after RCU finished. */
        crf(&rcu.head, wakeme_after_rcu);
        /* Wait for it. */
        wait_for_completion(&rcu.completion);
        destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(wait_rcu_gp);

 

  • init_rcu_head_on_stack(&rcu.head);
    • 디버그 오브젝트를 위해 rcu_head를 stack에서 초기화한다.
  • init_completion(&rcu.completion);
    • completion 구조체를 초기화한다.
  • crf(&rcu.head, wakeme_after_rcu);
    • 인수 crf로 받은 함수를 호출할 때 인수로 wakeme_after_rcu()함수를 전달한다.
      • 예) call_rcu() 함수를 호출
  • wait_for_completion(&rcu.completion);
    • 작업이 끝날 때까지 대기한다.
  • destroy_rcu_head_on_stack(&rcu.head);
    • 디버그 시 스택에 위치한 rcu.head를 제거한다.

 

wakeme_after_rcu()

kernel/rcu/update.c

/*
 * Awaken the corresponding synchronize_rcu() instance now that a
 * grace period has elapsed.
 */
static void wakeme_after_rcu(struct rcu_head  *head)
{
        struct rcu_synchronize *rcu;

        rcu = container_of(head, struct rcu_synchronize, head);
        complete(&rcu->completion);
}

작업 완료를 기다리고 있는 rcu에 완료 시그널을 보낸다.

 

synchronize_rcu_expedited()

kernel/rcu/tree_plugin.h

/**
 * synchronize_sched_expedited - Brute-force RCU-sched grace period
 *
 * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
 * approach to force the grace period to end quickly.  This consumes
 * significant time on all CPUs and is unfriendly to real-time workloads,
 * so is thus not recommended for any sort of common-case code.  In fact,
 * if you are using synchronize_sched_expedited() in a loop, please
 * restructure your code to batch your updates, and then use a single
 * synchronize_sched() instead.
 *
 * This implementation can be thought of as an application of ticket
 * locking to RCU, with sync_sched_expedited_started and
 * sync_sched_expedited_done taking on the roles of the halves
 * of the ticket-lock word.  Each task atomically increments
 * sync_sched_expedited_started upon entry, snapshotting the old value,
 * then attempts to stop all the CPUs.  If this succeeds, then each
 * CPU will have executed a context switch, resulting in an RCU-sched
 * grace period.  We are then done, so we use atomic_cmpxchg() to
 * update sync_sched_expedited_done to match our snapshot -- but
 * only if someone else has not already advanced past our snapshot.
 *
 * On the other hand, if try_stop_cpus() fails, we check the value
 * of sync_sched_expedited_done.  If it has advanced past our
 * initial snapshot, then someone else must have forced a grace period
 * some time after we took our snapshot.  In this case, our work is
 * done for us, and we can simply return.  Otherwise, we try again,
 * but keep our initial snapshot for purposes of checking for someone
 * doing our work for us.
 *
 * If we fail too many times in a row, we fall back to synchronize_sched().
 */
void synchronize_sched_expedited(void)
{
        cpumask_var_t cm;
        bool cma = false;
        int cpu;
        long firstsnap, s, snap;
        int trycount = 0;
        struct rcu_state *rsp = &rcu_sched_state;

        /*
         * If we are in danger of counter wrap, just do synchronize_sched().
         * By allowing sync_sched_expedited_started to advance no more than
         * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
         * that more than 3.5 billion CPUs would be required to force a
         * counter wrap on a 32-bit system.  Quite a few more CPUs would of
         * course be required on a 64-bit system.
         */
        if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
                         (ulong)atomic_long_read(&rsp->expedited_done) +
                         ULONG_MAX / 8)) {
                synchronize_sched();
                atomic_long_inc(&rsp->expedited_wrap);
                return;
        }

 

 

        /*
         * Take a ticket.  Note that atomic_inc_return() implies a
         * full memory barrier.
         */
        snap = atomic_long_inc_return(&rsp->expedited_start);
        firstsnap = snap;
        if (!try_get_online_cpus()) {
                /* CPU hotplug operation in flight, fall back to normal GP. */
                wait_rcu_gp(call_rcu_sched);
                atomic_long_inc(&rsp->expedited_normal);
                return;
        }
        WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));

        /* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
        cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
        if (cma) {
                cpumask_copy(cm, cpu_online_mask);
                cpumask_clear_cpu(raw_smp_processor_id(), cm);
                for_each_cpu(cpu, cm) {
                        struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

                        if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
                                cpumask_clear_cpu(cpu, cm);
                }
                if (cpumask_weight(cm) == 0)
                        goto all_cpus_idle;
        }

 

 

        /*
         * Each pass through the following loop attempts to force a
         * context switch on each CPU.
         */
        while (try_stop_cpus(cma ? cm : cpu_online_mask,
                             synchronize_sched_expedited_cpu_stop,
                             NULL) == -EAGAIN) {
                put_online_cpus();
                atomic_long_inc(&rsp->expedited_tryfail);

                /* Check to see if someone else did our work for us. */
                s = atomic_long_read(&rsp->expedited_done);
                if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
                        /* ensure test happens before caller kfree */
                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone1);
                        free_cpumask_var(cm);
                        return;
                }

                /* No joy, try again later.  Or just synchronize_sched(). */
                if (trycount++ < 10) {
                        udelay(trycount * num_online_cpus());
                } else {
                        wait_rcu_gp(call_rcu_sched);
                        atomic_long_inc(&rsp->expedited_normal);
                        free_cpumask_var(cm);
                        return;
                }

                /* Recheck to see if someone else did our work for us. */
                s = atomic_long_read(&rsp->expedited_done);
                if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
                        /* ensure test happens before caller kfree */
                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone2);
                        free_cpumask_var(cm);
                        return;
                }

                /*
                 * Refetching sync_sched_expedited_started allows later
                 * callers to piggyback on our grace period.  We retry
                 * after they started, so our grace period works for them,
                 * and they started after our first try, so their grace
                 * period works for us.
                 */
                if (!try_get_online_cpus()) {
                        /* CPU hotplug operation in flight, use normal GP. */
                        wait_rcu_gp(call_rcu_sched);
                        atomic_long_inc(&rsp->expedited_normal);
                        free_cpumask_var(cm);
                        return;
                }
                snap = atomic_long_read(&rsp->expedited_start);
                smp_mb(); /* ensure read is before try_stop_cpus(). */
        }
        atomic_long_inc(&rsp->expedited_stoppedcpus);

 

 

all_cpus_idle:
        free_cpumask_var(cm);

        /*
         * Everyone up to our most recent fetch is covered by our grace
         * period.  Update the counter, but only if our work is still
         * relevant -- which it won't be if someone who started later
         * than we did already did their update.
         */
        do {
                atomic_long_inc(&rsp->expedited_done_tries);
                s = atomic_long_read(&rsp->expedited_done);
                if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
                        /* ensure test happens before caller kfree */
                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_done_lost);
                        break;
                }
        } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
        atomic_long_inc(&rsp->expedited_done_exit);

        put_online_cpus();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);

 

 

 

동기 Update

call_rcu()

kernel/rcu/tree_plugin.h

/*
 * Queue a preemptible-RCU callback for invocation after a grace period.
 */
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
        __call_rcu(head, func, &rcu_preempt_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu);

grace period가 지난 후에 preemtible-RCU 콜백을 호출할 수 있도록 큐에 추가한다.

 

__call_rcu()

kernel/rcu/tree_plugin.h

/*
 * Helper function for call_rcu() and friends.  The cpu argument will
 * normally be -1, indicating "currently running CPU".  It may specify
 * a CPU only if that CPU is a no-CBs CPU.  Currently, only _rcu_barrier()
 * is expected to specify a CPU.
 */
static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
           struct rcu_state *rsp, int cpu, bool lazy)
{
        unsigned long flags;
        struct rcu_data *rdp;

        WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
        if (debug_rcu_head_queue(head)) {
                /* Probable double call_rcu(), so leak the callback. */
                ACCESS_ONCE(head->func) = rcu_leak_callback;
                WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
                return;
        }
        head->func = func;
        head->next = NULL;

        /*
         * Opportunistically note grace-period endings and beginnings.
         * Note that we might see a beginning right after we see an
         * end, but never vice versa, since this CPU has to pass through
         * a quiescent state betweentimes.
         */
        local_irq_save(flags);
        rdp = this_cpu_ptr(rsp->rda);

        /* Add the callback to our list. */
        if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
                int offline;

                if (cpu != -1)
                        rdp = per_cpu_ptr(rsp->rda, cpu);
                offline = !__call_rcu_nocb(rdp, head, lazy, flags);
                WARN_ON_ONCE(offline);
                /* _call_rcu() is illegal on offline CPU; leak the callback. */
                local_irq_restore(flags);
                return;
        }
        ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1;
        if (lazy)
                rdp->qlen_lazy++;
        else
                rcu_idle_count_callbacks_posted();
        smp_mb();  /* Count before adding callback for rcu_barrier(). */
        *rdp->nxttail[RCU_NEXT_TAIL] = head;
        rdp->nxttail[RCU_NEXT_TAIL] = &head->next;

        if (__is_kfree_rcu_offset((unsigned long)func))
                trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
                                         rdp->qlen_lazy, rdp->qlen);
        else
                trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);

        /* Go handle any RCU core processing required. */
        __call_rcu_core(rsp, rdp, head, flags);
        local_irq_restore(flags);
}

call_rcu의 헬퍼 함수로 grace period가 지난 후에 preemtible-RCU 콜백을 호출할 수 있도록 큐에 추가한다. 인수 cpu는 일반적으로 -1이 입력되는 경우 현재 러닝중인 cpu를 의미한다.

  • 코드 라인 15~20에서 CONFIG_DEBUG_OBJECTS_RCU_HEAD 커널 옵션을 사용하여 디버깅을 하는 동안 __call_rcu() 함수가 이중 호출되는 경우 경고 메시지를 출력하고 함수를 빠져나간다.
  • 코드 라인 21~22에서 rcu 인스턴스에 인수로 전달받은 콜백 함수를 대입하고 next에 null을 대입한다.
  • 코드 라인 31에서 현재 cpu에 대한 rcu 데이터를 알아온다.
  • 코드 라인 34~에서 낮은 확률로 cpu가 고정하여 요청하지 않은 경우

 

__call_rcu_core()

kernel/rcu/tree.c

/*
 * Handle any core-RCU processing required by a call_rcu() invocation.
 */
static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
                            struct rcu_head *head, unsigned long flags)
{
        bool needwake;

        /*
         * If called from an extended quiescent state, invoke the RCU
         * core in order to force a re-evaluation of RCU's idleness.
         */
        if (!rcu_is_watching() && cpu_online(smp_processor_id()))
                invoke_rcu_core();

        /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
        if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
                return;

        /*
         * Force the grace period if too many callbacks or too long waiting.
         * Enforce hysteresis, and don't invoke force_quiescent_state()
         * if some other CPU has recently done so.  Also, don't bother
         * invoking force_quiescent_state() if the newly enqueued callback
         * is the only one waiting for a grace period to complete.
         */
        if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {

                /* Are we ignoring a completed grace period? */
                note_gp_changes(rsp, rdp);

                /* Start a new grace period if one not already started. */
                if (!rcu_gp_in_progress(rsp)) {
                        struct rcu_node *rnp_root = rcu_get_root(rsp);

                        raw_spin_lock(&rnp_root->lock);
                        smp_mb__after_unlock_lock();
                        needwake = rcu_start_gp(rsp);
                        raw_spin_unlock(&rnp_root->lock);
                        if (needwake)
                                rcu_gp_kthread_wake(rsp);
                } else {
                        /* Give the grace period a kick. */
                        rdp->blimit = LONG_MAX;
                        if (rsp->n_force_qs == rdp->n_force_qs_snap &&
                            *rdp->nxttail[RCU_DONE_TAIL] != head)
                                force_quiescent_state(rsp);
                        rdp->n_force_qs_snap = rsp->n_force_qs;
                        rdp->qlen_last_fqs_check = rdp->qlen;
                }
        }
}

 

RCU NO-CB

 

__call_rcu_nocb()

kernel/rcu/tree_plugin.h

/*
 * This is a helper for __call_rcu(), which invokes this when the normal
 * callback queue is inoperable.  If this is not a no-CBs CPU, this
 * function returns failure back to __call_rcu(), which can complain
 * appropriately.
 *
 * Otherwise, this function queues the callback where the corresponding
 * "rcuo" kthread can find it.
 */
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
                            bool lazy, unsigned long flags)
{

        if (!rcu_is_nocb_cpu(rdp->cpu))
                return false;
        __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
        if (__is_kfree_rcu_offset((unsigned long)rhp->func))
                trace_rcu_kfree_callback(rdp->rsp->name, rhp,
                                         (unsigned long)rhp->func,
                                         -atomic_long_read(&rdp->nocb_q_count_lazy),
                                         -atomic_long_read(&rdp->nocb_q_count));
        else
                trace_rcu_callback(rdp->rsp->name, rhp,
                                   -atomic_long_read(&rdp->nocb_q_count_lazy),
                                   -atomic_long_read(&rdp->nocb_q_count));

        /*
         * If called from an extended quiescent state with interrupts
         * disabled, invoke the RCU core in order to allow the idle-entry
         * deferred-wakeup check to function.
         */
        if (irqs_disabled_flags(flags) &&
            !rcu_is_watching() &&
            cpu_online(smp_processor_id()))
                invoke_rcu_core();

        return true;
}

 

 

 

 

RCU NO-CB 설정

rcu_is_nocb_cpu()

kernel/rcu/tree_plugin.h

/* Is the specified CPU a no-CBs CPU? */
bool rcu_is_nocb_cpu(int cpu)
{
        if (have_rcu_nocb_mask)
                return cpumask_test_cpu(cpu, rcu_nocb_mask);
        return false;
}

요청한 cpu가 no-cb(rcu 스레드 사용)으로 설정되었는지 여부를 반환한다.

  • have_rcu_nocb_mask 변수는 rcu_nocb_setup() 함수가 호출되어 rcu_nocb_mask라는 cpu 마스크가 할당된 경우 true로 설정된다.
  • CONFIG_RCU_NOCB_CPU_ALL 커널 옵션을 사용하는 경우 항상 rcu 스레드에서 동작시키기 위해 true를 반환한다.
  • CONFIG_RCU_NOCB_CPU_ALL 및 CONFIG_RCU_NOCB_CPU 커널 옵션 둘 다 사용하지 않는 경우 항상 callback 처리하기 위해 false를 반환한다.

 

rcu_nocb_setup()

kernel/rcu/tree_plugin.h

/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
static int __init rcu_nocb_setup(char *str)
{
        alloc_bootmem_cpumask_var(&rcu_nocb_mask);
        have_rcu_nocb_mask = true;
        cpulist_parse(str, rcu_nocb_mask);
        return 1;
}
__setup("rcu_nocbs=", rcu_nocb_setup);

커널 파라메터 “rcu_nocbs=”에 cpu 리스트를 설정한다. 이렇게 설정된 cpu들은 rcu callback 처리를 rcu 스레드에서 처리할 수 있다.

  • 예) rcu_nocbs=3-6,8-10

 

참고

답글 남기기

이메일은 공개되지 않습니다. 필수 입력창은 * 로 표시되어 있습니다.