Examples - Tutorials

[1]:
import numpy as np
from time import perf_counter
from sympy import symbols, print_latex, MatrixSymbol

from sparkdatachallenge import (
    check_input,
    generate_add_triu,
    generate_mul_triu,
    solution_brute1,
    solution_brute2,
    solution_math,
    solution_math2
)

from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
[2]:
x,y = symbols('x y')

Generating latex

[3]:
print_latex(x*y>=x+y)
x y \geq x + y
[4]:
print_latex(y * (x-1) >= x)
y \left(x - 1\right) \geq x
[5]:
print_latex(y >= x/(x-1))
y \geq \frac{x}{x - 1}

Math

Some math:

\begin{align} x y &\geq x + y \\ y \left(x - 1\right) &\geq x \\ y \begin{cases} \geq \frac{x}{x - 1},\qquad x-1 > 0 \\ \leq \frac{x}{x - 1},\qquad x-1 < 0 \\ \end{cases} \end{align}
[6]:
# coord
x0 = np.linspace(0,0.9,100)
x1 = np.linspace(1.05,8,1000)
x  = np.hstack([x0,x1])

fig = plt.figure(figsize=(10,8))
plt.plot(x0, x0 / (x0-1))
plt.plot(x1, x1 / (x1-1),linewidth=4)

ax = plt.gca()

ax.add_patch(Rectangle(xy=(0,-10),width=1,height=30, fill=True, alpha=.2, color='red'))
# ax.add_patch(Rectangle(xy=(2,-10),width=6,height=30, fill=True, alpha=.2, color='green'))
plt.fill_between(x1,x1/(x1-1),20,color='green',alpha=.3)
plt.fill_between(x1,x1/(x1-1),-20,color='red',alpha=.2)
ax.axhline(2)
ax.axvline(2)
ax.axhline(1)
ax.axhline(0, c='black')
ax.axvline(0,c='red')
plt.xlabel(r"$x$",fontsize=16)
plt.ylabel(r"$y$",fontsize=16)
ax.text(1.4,5,r"$\mathbf{\frac{x}{x-1}}$",fontsize=24,c='black')
plt.ylim(-2,6)
plt.grid()
plt.show()
../_images/notebook_Tutorial_9_0.png

The plot above shows the possibilities, except for the zero combinations (case 1).

Cases:

  1. \(x=0 \Rightarrow y=0\) number of pairs : \(\frac{n(n-1)}{2}\)

  2. \(0<x<1 \Rightarrow\) no solution

  3. \(x=1 \overset{\text{eq.2}}{\Longrightarrow} y*0 >=1 \Rightarrow\) no solution

  4. \(1<x<2 \Rightarrow y >= \frac{x}{x-1}\)

  5. \(x=2 \Rightarrow y >=2\)

  6. \(x>=2 \Rightarrow y >= x/(x-1) \Rightarrow y >=2\) using non-decreasing sorted list all pairs are multiplicative => number of pairs is \(\frac{n(n-1)}{2}\)

Simple test 1

[3]:
A = np.array([0, 1, 2 , 2, 3, 5])
B = np.array([500_000,500_000,0,0,0,20_000])
[4]:
solution_brute1(A,B, verbose=True)
[(1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 4), (3, 5), (4, 5)]
[4]:
8
[5]:
solution_brute2(A,B, verbose=True)
[(1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 4), (3, 5), (4, 5)]
[5]:
8
[6]:
solution_math(A,B)
[6]:
8
[7]:
solution_math2(A,B)
[7]:
8

Simple test 2

[8]:
A = np.hstack([np.array([0]),A])
B = np.hstack([np.array([0]),B])
[9]:
solution_brute1(A,B, verbose=True)
[(2, 5), (2, 6), (3, 4), (3, 5), (3, 6), (4, 5), (4, 6), (5, 6)]
[9]:
8
[10]:
solution_brute2(A,B, verbose=True)
[(2, 5), (2, 6), (3, 4), (3, 5), (3, 6), (4, 5), (4, 6), (5, 6)]
[10]:
8
[11]:
solution_math(A,B)
[11]:
8
[12]:
solution_math2(A,B)
[12]:
8
[13]:
A = np.hstack([np.array([0]),A])
B = np.hstack([np.array([0]),B])
[14]:
solution_brute1(A,B, verbose=True)
[(0, 1), (3, 6), (3, 7), (4, 5), (4, 6), (4, 7), (5, 6), (5, 7), (6, 7)]
[14]:
9
[15]:
solution_brute2(A,B, verbose=True)
[(0, 1), (3, 6), (3, 7), (4, 5), (4, 6), (4, 7), (5, 6), (5, 7), (6, 7)]
[15]:
9
[16]:
solution_math(A,B)
[16]:
9
[17]:
solution_math2(A,B)
[17]:
9

Performance test

[18]:
for p in range(0,6):
    tic = perf_counter()
    A=np.array([1,3]*int(10**p / 2))
    B=np.array([500_000,0]*int(10**p / 2))
    print(solution_math(A,B))
    print("Pow {} took {}s".format(p, perf_counter()-tic))
0
Pow 0 took 0.0003568940155673772s
35
Pow 1 took 0.00016395500279031694s
3725
Pow 2 took 0.00045986700570210814s
374750
Pow 3 took 0.0034787720069289207s
37497500
Pow 4 took 0.060309346998110414s
1000000000
Pow 5 took 3.537817439995706s
[ ]:
def compare(A,B,P,Q,scale=1_000_000):
    # use associativity on integer and decimal part
    prodi = A[P] * A[Q]  + ( A[P] * B[Q] ) // scale + ( A[Q] * B[P] ) // scale
    prodd = (( A[P] * B[Q] ) % scale) * scale  + (( A[Q] * B[P] ) % scale ) * scale + (B[P]*B[Q])
    print(prodi, prodd)

    sumi = A[P] + A[Q] + (B[P]+B[Q]) // scale
    sumd = ((B[P]+B[Q]) % scale)*scale

    print(sumi,sumd)

    if prodi > sumi:
        return True
    elif prodi == sumi:
        if prodd >= sumd:
            return True
    return False

compare([0,1,3],[0,400_000,500_000],1,2)
[33]:
def solution_math2(
    A: np.array, B: np.array, threshold: int = 1_000_000_000, scale: int = 1_000_000
) -> int:
    """Math based method. See tutorial/examples in docs for more details.add()

    Parameters
    ----------
    A : np.array
        integer part of the decimal numbers
    B : np.array
        decimal part of the decimal numbers
    threshold : int, optional
        threshold value for the number of pairs, by default 1_000_000_000
    scale : int, optional
        scale factor for the decimals, by default 1_000_000

    Returns
    -------
    int
        returns number of mul pairs or the threshold value
    """
    C: np.array = np.sort(A + B / scale)

    # init count
    count = 0
    # x == 0 => y ==0 => count  C[i] = 0.0
    nzero = C[C == 0.0].shape[0]

    # calculate the number of zero - zero pairs and update count
    if nzero > 1:
        count = nzero * (nzero - 1) / 2

    if count > threshold:
        return threshold
    # 0 < x < 1 => no solution

    # x==1 => no solution

    #  1 < x < 2 => y >= x / (x-1)
    # inequality is always satisfied for x, y >= 2
    # for 1<x<2  we need y>=x/(x-1)
    # get indices of A == 1
    one_idx = np.argwhere(divmod(C,1)[0].astype(int)==1)[:,0]

    for idx in one_idx:
        # A == 1
        # A.B / (A.B - 1) = 1.B / (1.B - 1) = 1.B / 0.B = 1 / 0.B + 1
        f = scale / B[idx] * scale # max accuracy in B

        fscaled = int(f) + scale
        fscaled += 0 if np.ceil(f) else 1 # taking into account the last digit

        count += C[scale * C >= fscaled].shape[0]


    if count > threshold:
        return threshold

    # case x>=2 and y>=2
    k = C[C >= 2.0].shape[0]

    count += k * (k - 1) / 2

    return int(count)

[2]:
divmod(1.4,1)
[2]:
(1.0, 0.3999999999999999)
[34]:
A = np.array([0,1,3])
B = np.array([0,400_000,500_000])
scale = 1_000_000
C = A + B / scale
[10]:
divmod(C,1)[0]
[10]:
array([0., 1., 3.])
[32]:
np.argwhere(divmod(C,1)[0].astype(int)==1)[:,0]
[32]:
array([1, 2])
[35]:
solution_math(A,B)
[35]:
0
[36]:
solution_math2(A,B)
[36]:
1
[ ]: